diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000..5c34ec47cf --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,37 @@ +{ + "name": "gem5 Development Container", + "image": "ghcr.io/gem5/devcontainer:latest", + "hostRequirements": { + "cpus": 8, + "memory": "16gb", + "storage": "32gb" + }, + "customizations": { + "vscode": { + "extensions": [ + "eamodio.gitlens", + "GitHub.copilot", + "GitHub.copilot-chat", + "GitHub.vscode-pull-request-github", + "ms-python.debugpy", + "ms-python.isort", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-vscode.cpptools", + "ms-vscode.cpptools-extension-pack", + "ms-vscode.cpptools-themes", + "ms-vscode.makefile-tools", + "ms-vscode-remote.remote-containers", + "Tsinghua-Hexin-Joint-Institute.gem5-slicc", + "VisualStudioExptTeam.vscodeintellicode" + ] + } + }, + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + "ghcr.io/devcontainers/features/github-cli:1": {}, + "ghcr.io/devcontainers-contrib/features/actionlint:1": {}, + "ghcr.io/devcontainers-contrib/features/vscode-cli:1": {} + }, + "onCreateCommand": "./.devcontainer/on-create.sh" +} diff --git a/.devcontainer/on-create.sh b/.devcontainer/on-create.sh new file mode 100755 index 0000000000..77f642c32b --- /dev/null +++ b/.devcontainer/on-create.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright (c) 2024 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This script is run when the Docker container specified in devcontainer.json +# is created. + +set -e + +# Refresh the git index. +git update-index + +# Install the pre-commit checks. +./util/pre-commit-install.sh diff --git a/.gitignore b/.gitignore index d1904756d2..36ba603fb6 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ configs/dram/lowp_sweep.cfg .pyenv .vscode typings +.DS_Store diff --git a/.mailmap b/.mailmap index 49c438d3eb..1ce4d098c6 100644 --- a/.mailmap +++ b/.mailmap @@ -1,8 +1,11 @@ Abdul Mutaal Ahmad adarshpatil +Aditya K Kamath aditya Adrià Armejach Adrià Armejach +Adrià Armejach Adrià Armejach <66964292+aarmejach@users.noreply.github.com> Adrian Herrera Adrien Pesle +Adwaith R Krishna Akash Bagdia Akash Bagdia Alec Roelke Alec Roelke Alexander Klimov @@ -10,21 +13,19 @@ Alexandru Dutu Alexandru Alex Richardson Ali Jafri Ali Saidi Ali Saidi -Ali Saidi Ali Saidi Ali Saidi Ali Saidi Alistair Delva +Alvaro Moreno Amin Farmahini Anders Handler Andrea Mondelli Andrea Mondelli -Andrea Mondelli Andrea Mondelli Andrea Pellegrini Andreas Hansson Andreas Hansson Andreas Hansson Andreas Hansson -Andreas Hansson Andreas Hansson Andreas Hansson Andreas Hansson Andreas Sandberg Andreas Sandberg -Andreas Sandberg Andreas Sandberg Andreas Sandberg Andreas Sandberg +Andreas Sandberg Andreas Sandberg Andrew Bardsley Andrew Bardsley Andrew Lukefahr Andrew Schultz @@ -32,11 +33,14 @@ Andriani Mappoura Angie Lee Anis Peysieux Ani Udipi +anoop Anouk Van Laer ARM gem5 Developers Arthur Perais Arthur Perais Arun Rodrigues Ashkan Tousi +atrah22 +Atri Bhattacharyya Austin Harris Austin Harris Avishai Tvila Ayaz Akram @@ -48,6 +52,7 @@ Bjoern A. Zeeb Blake Hechtman Blake Hechtman Blake Hechtman Blake Hechtman ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) Bobby R. Bruce Bobby Bruce +Bobby R. Bruce Bobby Bruce Boris Shingarov Boris Shingarov Brad Beckmann Brad Beckmann Brad Beckmann Brad Beckmann ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) @@ -60,15 +65,13 @@ Brian Grayson Cagdas Dirik cdirik Carlos Falquez Chander Sudanthi Chander Sudanthi -Chander Sudanthi Chander Sudanthi Charles Jamieson -CHEN Meng +Chen Meng Chen Zou Chia-You Chen -Chow, Marcus +Marcus Chow Chris Adeniyi-Jones Chris Emmons Chris Emmons -Chris Emmons Chris Emmons Chris January Christian Menard Christian Menard Christopher Torng @@ -83,17 +86,19 @@ Daecheol You Dam Sunwoo Dan Gibson Daniel Carvalho Daniel +Daniel Carvalho Daniel Carvalho Daniel Carvalho Daniel R. Carvalho Daniel Gerzhoy Daniel Johnson +Daniel Kouchekinia Daniel Sanchez Davide Basilio Bartolini David Guillen-Fandos David Guillen David Guillen-Fandos David Guillen Fandos David Hashe David Hashe David Oehmke -David Schall -Derek Christ +David Schall David Schall +Derek Christ Derek Christ <44267643+derchr@users.noreply.github.com> Derek Hower Deyaun Guo Deyuan Guo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) Deyaun Guo Deyuan Guo @@ -107,11 +112,12 @@ Earl Ou eavivi Éder F. Zulian Edmund Grimley Evans -Eduardo José Gómez Hernández +Eduardo José Gómez Hernández Eduardo José Gómez Hernández Eliot Moss Emilio Castillo Emilio Castillo Emilio Castillo Emilio Castillo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) Emily Brickey +Emin Gadzhiev Erfan Azarkhish Erhu Eric Van Hensbergen Eric Van Hensbergen @@ -125,11 +131,12 @@ Gabe Black Gabe Black Gabe Black Gabe Black Gabe Loh gloh Gabor Dozsa -Gabriel Busnot +Gabriel Busnot Gabriel Busnot +Gabriel Busnot Gabriel Busnot gauravjain14 +Gautham Pathak Gedare Bloom Gedare Bloom Gene Wu Gene WU -Gene WU Gene Wu Geoffrey Blake Geoffrey Blake Geoffrey Blake Geoffrey Blake Georg Kotheimer @@ -140,10 +147,14 @@ GWDx Hamid Reza Khaleghzadeh Hamid Reza Khaleghzadeh ext:(%2C%20Lluc%20Alvarez%20%3Clluc.alvarez%40bsc.es%3E%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) handsomeliu Hanhwi Jang -Hoa Nguyen +Harshil Patel Harshil Patel +Harshil Patel Harshil Patel <91860903+Harshil2107@users.noreply.github.com> +Wenjian He +HJikram +Hoa Nguyen Hoa Nguyen Hongil Yoon Hsuan Hsu -huangjs +hungweihsu hungweihsuG <145444687+hungweihsuG@users.noreply.github.com> Hussein Elnawawy Ian Jiang IanJiangICT @@ -152,9 +163,13 @@ Iru Cai Isaac Richter Isaac Sánchez Barrera Ivan Pizarro -Jack Whitham Jack Whitman +Ivan Turasov +Ivana Mitrovic Ivana Mitrovic +Ivana Mitrovic ivanaamit +Jack Whitham Jairo Balart Jakub Jermar +James Braun James Clarkson Jan-Peter Larsson Jan Vrany @@ -174,8 +189,8 @@ Jayneel Gandhi Jennifer Treichler Jerin Joy Jiajie Chen -Jiasen Huang -Jiasen +Jiasen Huang Jiasen +Jiasen Huang huangjs Jiayi Huang jiegec Jieming Yin jiemingyin @@ -188,14 +203,17 @@ Joel Hestness Joel Hestness Joël Porquet-Lupine John Alsop John Kalamatianos jkalamat +Johnny Jordi Vaquero Jose Marinho Juan M. Cebrian Jui-min Lee -kai.ren Kai Ren +Kai Ren kai.ren +Kai Ren Kai Ren +KaiBatley <68886332+KaiBatley@users.noreply.github.com> Kanishk Sugand Karthik Sangaiah -Kaustav Goswami +Kaustav Goswami Kaustav Goswami <39310478+kaustav-goswami@users.noreply.github.com> Kelly Nguyen Ke Meng Kevin Brodsky @@ -206,11 +224,16 @@ Koan-Sin Tan Korey Sewell Krishnendra Nathella Krishnendra Nathella ksco -kunpai +Kunal Pai Kunal Pai <62979320+kunpai@users.noreply.github.com> +Kunal Pai kunpai +Kunal Pai paikunal +Kunal Pai KUNAL PAI Kyle Roarty Kyle Roarty Laura Hinman Lena Olson Lena Olson Lena Olson Lena Olson +Leo Redivo leoredivo <94771718+leoredivo@users.noreply.github.com> +Lingkang Lisa Hsu Lisa Hsu Lluc Alvarez Lluís Vilanova Lluis Vilanova @@ -221,9 +244,11 @@ Mahyar Samani Majid Jalili Malek Musleh Nilay Vaish ext:(%2C%20Malek%20Musleh%20%3Cmalek.musleh%40gmail.com%3E) Marc Mari Barcelo -Marco Balboni -Marco Elver Marco Elver Marc Orr Marc Orr +Marco Balboni +Marco Chen +Marco Elver Marco Elver +Marco Kurzynski Marjan Fariborz marjanfariborz Mark Hildebrand Marton Erdos @@ -233,20 +258,18 @@ Matteo Andreozzi Matteo Andreozzi Matt Evans Matt Evans Matthew Poremba Matthew Poremba +Matthias Boettcher Matthias Hille -Matthias Jung -Matthias Jung -Matt Horsnell Matt Horsnell +Matthias Jung Matthias Jung Matt Horsnell Matt Horsnell -Matt Horsnell Matt Horsnell Matt Poremba Matt Poremba -Matt Sinclair Matthew Sinclair -Matt Sinclair Matt Sinclair +Matt Sinclair Matt Sinclair +Matt Sinclair Matthew Sinclair Maurice Becker Maxime Martinasso Maximilian Stein Maximilian Stein Maximilien Breughe Maximilien Breughe -Melissa Jost +Melissa Jost Melissa Jost <50555529+mkjost0@users.noreply.github.com> Michael Adler Michael Boyer Michael LeBeane Michael LeBeane @@ -262,7 +285,6 @@ Min Kyu Jeong Min Kyu Jeong Mitch Hayenga Mitchell Hayenga Mitch Hayenga Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) Mitch Hayenga Mitch Hayenga -Mitch Hayenga Mitch Hayenga Mitch Hayenga Mitch Hayenga Mohammad Alian Monir Mozumder @@ -279,13 +301,17 @@ Nathan Binkert Nathan Binkert Nayan Deshmukh Neha Agarwal Neil Natekar -Nicholas Lindsay +Nicholas Lindsay Nicholas Lindsay +Nicholas Mosier Nicholas Mosier Nicolas Boichat Nicolas Derumigny Nicolas Zea +Nikolaos Kyparissas Nikos Nikoleris Nikos Nikoleris Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) Nils Asmussen Nils Asmussen +Nitesh Narayana +Nitish Arya <42148385+aryanitish@users.noreply.github.com> Noah Katz ntampouratzis Nuwan Jayasena @@ -293,7 +319,6 @@ Ola Jeppsson Omar Naji Onur Kayiran Pablo Prieto -paikunal Palle Lyckegaard Pau Cabre Paul Rosenfeld Paul Rosenfeld @@ -308,29 +333,39 @@ Po-Hao Su Polina Dudnik Polina Dudnik Polydoros Petrakis Pouya Fotouhi Pouya Fotouhi +Prajwal Hegde Prakash Ramrakhyani Prakash Ramrakhani Prakash Ramrakhyani Prakash Ramrakhyani Pritha Ghoshal +Pu (Luke) Yi Quentin Forcioli Radhika Jagtap Radhika Jagtap Rahul Thakur -Reiley Jeapaul +Rajarshi Das +Ranganath (Bujji) Selagamsetty BujSet +Razeza +Reiley Jeapaul Reiley Jeapaul Rekai Gonzalez-Alberquilla Rekai Gonzalez Alberquilla -Rekai Gonzalez-Alberquilla Rekai Gonzalez Alberquilla Rekai Gonzalez-Alberquilla Rekai Gonzalez-Alberquilla Rekai Gonzalez-Alberquilla Rekai Rene de Jong Ricardo Alves Richard Cooper -Richard D. Strong +Richard Strong Richard D. Strong Richard Strong Richard Strong Richard Strong Richard Strong Richard Strong Rick Strong Rico Amslinger Riken Gohil Rizwana Begum +Robert Hauser <85344819+robhau@users.noreply.github.com> Robert Kovacsics Robert Scheffel Robert +Rocky Tatiefo +Roger Chang rogerchang23424 +Roger Chang rogerchang23424 <32214817+rogerchang23424@users.noreply.github.com> +Roger Chang rogerchang23424 +Roger Chang Yu-Cheng Chang Rohit Kurup Ron Dreslinski Ronald Dreslinski Ruben Ayrapetyan @@ -342,23 +377,21 @@ sacak32 Sampad Mohapatra Samuel Grayson Samuel Stark -Sandipan Das <31861871+sandip4n@users.noreply.github.com> Sandipan Das Sandipan Das <31861871+sandip4n@users.noreply.github.com> Santi Galan Sascha Bischoff Sascha Bischoff -Sascha Bischoff Sascha Bischoff +Saúl Adserias <33020671+saul44203@users.noreply.github.com> Sean McGoogan Sean Wilson Sergei Trofimov Severin Wischmann Severin Wischmann ext:(%2C%20Ioannis%20Ilkos%20%3Cioannis.ilkos09%40imperial.ac.uk%3E) Shawn Rosti Sherif Elhabbal -Shivani Parekh -Shivani +Shivani Parekh Shivani Siddhesh Poyarekar +Simon Park Somayeh Sardashti -Sooraj Puthoor -Sooraj Puthoor +Sooraj Puthoor Sooraj Puthoor Sophiane Senni Soumyaroop Roy Srikant Bharadwaj @@ -370,7 +403,6 @@ Steve Raasch Steve Reinhardt Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) Steve Reinhardt Steve Reinhardt Steve Reinhardt Steve Reinhardt -Steve Reinhardt Steve Reinhardt Stian Hvatum Sudhanshu Jha Sujay Phadke @@ -378,16 +410,18 @@ Sungkeun Kim Swapnil Haria Swapnil Haria Taeho Kgil Tao Zhang +Thilo Vörtler root Thomas Grass Tiago Mück Tiago Muck +Tiberiu Bucur <36485854+TiberiuBucur@users.noreply.github.com> Tim Harris Timothy Hayes Timothy M. Jones Timothy Jones Timothy M. Jones Timothy M. Jones Timothy M. Jones Timothy M. Jones Tom Jablin -Tommaso Marinelli Tom Rollet +Tommaso Marinelli Tong Shen Tony Gutierrez Anthony Gutierrez Travis Boraten @@ -401,6 +435,7 @@ Victor Garcia Vilas Sridharan Vincentius Robby Vince Weaver +Vishnu Ramadas vramadas95 vsoria Wade Walker @@ -409,14 +444,16 @@ Weiping Liao Wende Tan Wendy Elsasser William Wang William Wang -William Wang William Wang Willy Wolff Wing Li +wmin0 Xiangyu Dong Xianwei Zhang Xianwei Zhang Xiaoyu Ma Xin Ouyang Xiongfei +Xuan Hu +Yan Lee Yasuko Eckert Yen-lin Lai Yifei Liu @@ -426,7 +463,10 @@ Yuan Yao Yuetsu Kodama yuetsu.kodama Yu-hsin Wang Zhang Zheng -Zhantong Qiu +Zhantong Qiu studyztp Zhengrong Wang seanzw +Zhengrong Wang Zhengrong Wang zhongchengyong Zicong Wang +Zixian Cai <2891235+caizixian@users.noreply.github.com> +zmckevitt diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index d99e7226f3..c2c6b382e0 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,3 +1,161 @@ +# Version 24.0 + +gem5 Version 24.0 is the first major release of 2024. +During this time there have been 298 pull requests merged, comprising of over 600 commits, from 56 unique contributors. + +## API and user-facing changes + +* The GCN3 GPU model has been removed in favor of the newer VEGA_X85 GPU model. +* gem5 now supports building, running, and simulating Ubuntu 24.04. + +### Compiler and OS support + +As of this release gem5 support Clang version 6 to 16 and GCC version 10 to 13. +While other compilers and versions may work, they are not regularly tested. + +gem5 now supports building, running, and simulating on Ubuntu 24.04. +We continue to support 22.04 with 20.04 being deprecated in the coming year. +The majority of our testing is done on Ubuntu LTS systems though Apple Silicon machines and other Linux distributions have also been used regularly during development. +Improvements have been made to ensure a wider support of operating systems. + +## New features + +### gem5 MultiSim: Multiprocessing for gem5 + +The gem5 "MultiSim" module allows for multiple simulations to be run from a single gem5 execution via a single gem5 configuration script. +This allows for multiple simulations to be run in parallel in a structured manner. + +To use MultiSim first create multiple simulators and add them to the MultiSim with the `add_simulator` function. +If needed, limit the maximum number of parallel processes with the `set_num_processes` function. +Then run the simulations in parallel with the `gem5` binary using `-m gem5.utils.multisim`. + +Here is an example of how to use MultiSim: + +```python +import gem5.util.multisim as multisim + +# Set the maximum number of processes to run in parallel +multisim.set_num_processes(4) + +# Create multiple simulators. +# In this case, one for each workload in the benchmark suite. +for workload in benchmark_suite: + board = X86Board( + # ... + ) + board.set_workload(workload) + + # Useful to set the ID here. This is used to create unique output + # directorires for each gem5 process and can be used to idenfify and + # run gem5 processes individually. + simulator = Simulator(board, id=f"{workload.get_id()}") + multisim.add_simulator(simulator) +``` + +Then to run the simulations in parallel: + +```sh + -m gem5.utils.multisim +``` + +The output directory ("m5out" by default) will contain sub-directories for each simulation run. +The sub-directory will be named after the simulator ID set in the configuration script. +We therefore recommend setting the simulator ID to something meaningful to help identify the output directories (i.e., the workload run or something identifying the meaningful characteristics of the simulated system in comparison to others). + +If only one simulation specified in the config needs run, you can do so with: + +```sh + --list # Lists the simulations by ID + + # Run the simulation with the specified ID. +``` + +Example scripts of using MultiSim can be found in "configs/example/gem5_library/multisim". + + +### RISC-V Vector Extension Support + +There have been significant improvements to the RVV support in gem5 including + +* Fixed viota (#1137) +* Fixed vrgather (#1134) +* Added RVV FP16 support (#1123) +* Fixed widening and narrowing instructions (#1079) +* Fixed bug in vfmv.f.s (#863) +* Add unit stride segment loads and stores (#851) (#913) +* Fix vl in masked load/store (#830) +* Add unit-stride loads (#794) +* Fix many RVV instructions (#814) (#805) (#715) + +### General RISC-V bugfixes + +* Fixed problem in TLB lookup (#1264) +* Fixed sign-extended branch target (#1173) +* Fixed compressed jump instructions (#1163) +* Fixed GDB connection (#1152) +* Fixed CSR behavior (#1099) +* Add Integer conditional operations Zicond (#1078) +* Add RISC-V Semihosting support (#681) +* Added more detailed instruction types (#589) +* Fixed 32-bit m5op arguments (#900) +* Fixed c.fswsp and c.fsw (#998) (#1005) +* Update PLIC implementation (#886) +* Fix fflags behavior in O3 (#868) +* Add support for local interrupts (#813) +* Removebit 63 of physical address (#756) + +## Improvements + +* Added an new generator which can generate requests based on [spatter](https://github.com/hpcgarage/spatter) patterns. +* KVM is now supported in the gem5 Standard Library ARM Board. +* Generic Cache template added to the Standard Library: https://github.com/gem5/gem5/pull/745 +* Support added for partitioning caches. +* The Standard Library `obtain_resources` function can request multiple resources at once thus reducing delay associated with multiple requests. +* An official gem5 DevContainer has been added to the gem5 repository. +This can be used to build and run gem5 in consistent environment and enables GitHub Codespaces support. + +### gem5 Python Statistics + +The gem5 Python statistics API has been improved. +The gem5 Project's general intent with this improvement is make it easier and more desirable to obtain and interact with gem5 simulation statistics via Python. + +For example, the following code snippet demonstrates how to obtain statistics from a gem5 simulation: + +```python +from m5.stats.gem5stats import get_simstat + +## Setup and run the configuation ... +simstat = get_simstat(board) + +# Print the number of cycles the CPU at index 0 has executed. +print(simstat.cpu[0].numCycles) + +# Strings can also be used to access statistics. +print(simstat['cpu'][0]['numCycles']) + +# Print the total number of cycles executed by all CPUs. +print(sum(simstat.cpu[i].numCycles for i in range(len(simstat.cpu)))) +``` + +We hope the usage of the gem5 Python statistics API will be more intuitive and easier to use while allowing better processing of statistical data. + +### GPU Model + +* Support for MI300X and MI200 GPU models including their features and most instructions. +* ROCm 6.1 disk image and compile docker files have been added. ROCm 5.4.2 and 4.2 resources are removed. +* The deprecated GCN3 ISA has been removed. Use VEGA instead. + +## Bug Fixes + +* An integer overflow error known to affect the `AddrRange` class has been fixed. +* Fix fflags behavior of floating point instruction in RISC-V for Out-of-Order CPUs. + +### Arm FEAT_MPAM Support + +An initial implementation of FEAT_MPAM has been introduced in gem5 with the capability to statically partition +classic caches. Guidance on how to use this is available on a Arm community [blog post](https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/gem5-cache-partitioning) + + # Version 23.1 gem5 Version 23.1 is our first release where the development has been on GitHub. diff --git a/SConstruct b/SConstruct index ef4d154312..fa6e05177a 100755 --- a/SConstruct +++ b/SConstruct @@ -117,6 +117,8 @@ AddOption('--no-compress-debug', action='store_true', help="Don't compress debug info in build files") AddOption('--with-lto', action='store_true', help='Enable Link-Time Optimization') +AddOption('--with-libcxx', action='store_true', + help='Use libc++ as the C++ standard library (requires Clang)') AddOption('--verbose', action='store_true', help='Print full tool command lines') AddOption('--without-python', action='store_true', @@ -550,11 +552,6 @@ for variant_path in variant_paths: env.Append(CCFLAGS=['-pipe']) env.Append(CCFLAGS=['-fno-strict-aliasing']) - # Enable -Wall and -Wextra and then disable the few warnings that - # we consistently violate - env.Append(CCFLAGS=['-Wall', '-Wundef', '-Wextra', - '-Wno-sign-compare', '-Wno-unused-parameter']) - # We always compile using C++17 env.Append(CXXFLAGS=['-std=c++17']) @@ -567,6 +564,16 @@ for variant_path in variant_paths: with gem5_scons.Configure(env) as conf: conf.CheckLinkFlag('-Wl,--as-needed') + want_libcxx = GetOption('with_libcxx') + if want_libcxx: + with gem5_scons.Configure(env) as conf: + # Try using libc++ if it supports the library. + code = '#include \nint main() { return 0; }' + if (not conf.CheckCxxFlag('-stdlib=libc++') or + not conf.CheckLinkFlag('-stdlib=libc++', code=code) + ): + error('Requested libc++ but it is not usable') + linker = GetOption('linker') if linker: with gem5_scons.Configure(env) as conf: @@ -597,6 +604,13 @@ for variant_path in variant_paths: env.Append(LINKFLAGS=['-Wl,--no-keep-memory']) else: error("Unable to use --no-keep-memory with the linker") + + # Treat warnings as errors but white list some warnings that we + # want to allow (e.g., deprecation warnings). + env.Append(CCFLAGS=['-Werror', + '-Wno-error=deprecated-declarations', + '-Wno-error=deprecated', + ]) else: error('\n'.join(( "Don't know what compiler options to use for your compiler.", @@ -612,8 +626,8 @@ for variant_path in variant_paths: "src/SConscript to support that compiler."))) if env['GCC']: - if compareVersions(env['CXXVERSION'], "7") < 0: - error('gcc version 7 or newer required.\n' + if compareVersions(env['CXXVERSION'], "10") < 0: + error('gcc version 10 or newer required.\n' 'Installed version:', env['CXXVERSION']) # Add the appropriate Link-Time Optimization (LTO) flags if @@ -637,17 +651,6 @@ for variant_path in variant_paths: '-fno-builtin-malloc', '-fno-builtin-calloc', '-fno-builtin-realloc', '-fno-builtin-free']) - if compareVersions(env['CXXVERSION'], "9") < 0: - # `libstdc++fs`` must be explicitly linked for `std::filesystem`` - # in GCC version 8. As of GCC version 9, this is not required. - # - # In GCC 7 the `libstdc++fs`` library explicit linkage is also - # required but the `std::filesystem` is under the `experimental` - # namespace(`std::experimental::filesystem`). - # - # Note: gem5 does not support GCC versions < 7. - env.Append(LIBS=['stdc++fs']) - elif env['CLANG']: if compareVersions(env['CXXVERSION'], "6") < 0: error('clang version 6 or newer required.\n' @@ -665,7 +668,7 @@ for variant_path in variant_paths: env.Append(TCMALLOC_CCFLAGS=['-fno-builtin']) - if compareVersions(env['CXXVERSION'], "11") < 0: + if not want_libcxx and compareVersions(env['CXXVERSION'], "11") < 0: # `libstdc++fs`` must be explicitly linked for `std::filesystem`` # in clang versions 6 through 10. # @@ -679,7 +682,7 @@ for variant_path in variant_paths: # On Mac OS X/Darwin we need to also use libc++ (part of XCode) as # opposed to libstdc++, as the later is dated. - if sys.platform == "darwin": + if not want_libcxx and sys.platform == "darwin": env.Append(CXXFLAGS=['-stdlib=libc++']) env.Append(LIBS=['c++']) @@ -688,20 +691,26 @@ for variant_path in variant_paths: if GetOption('with_ubsan'): sanitizers.append('undefined') if GetOption('with_asan'): - # Available for gcc >= 5 or llvm >= 3.1 both a requirement - # by the build system - sanitizers.append('address') - suppressions_file = Dir('util').File('lsan-suppressions').get_abspath() - suppressions_opt = 'suppressions=%s' % suppressions_file - suppressions_opts = ':'.join([suppressions_opt, - 'print_suppressions=0']) - env['ENV']['LSAN_OPTIONS'] = suppressions_opts - print() - warning('To suppress false positive leaks, set the LSAN_OPTIONS ' - 'environment variable to "%s" when running gem5' % - suppressions_opts) - warning('LSAN_OPTIONS=%s' % suppressions_opts) - print() + if env['GCC']: + # Address sanitizer is not supported with GCC. Please see Github + # Issue https://github.com/gem5/gem5/issues/916 for more details. + warning("Address Sanitizer is not supported with GCC. " + "This option will be ignored.") + else: + # Available for llvm >= 3.1. A requirement by the build system. + sanitizers.append('address') + suppressions_file = Dir('util').File('lsan-suppressions')\ + .get_abspath() + suppressions_opt = 'suppressions=%s' % suppressions_file + suppressions_opts = ':'.join([suppressions_opt, + 'print_suppressions=0']) + env['ENV']['LSAN_OPTIONS'] = suppressions_opts + print() + warning('To suppress false positive leaks, set the LSAN_OPTIONS ' + 'environment variable to "%s" when running gem5' % + suppressions_opts) + warning('LSAN_OPTIONS=%s' % suppressions_opts) + print() if sanitizers: sanitizers = ','.join(sanitizers) if env['GCC'] or env['CLANG']: diff --git a/build_opts/ALL b/build_opts/ALL index b44c7a09f7..4f4ae1b8dc 100644 --- a/build_opts/ALL +++ b/build_opts/ALL @@ -7,3 +7,4 @@ USE_POWER_ISA=y USE_RISCV_ISA=y USE_SPARC_ISA=y USE_X86_ISA=y +USE_TEST_OBJECTS=y diff --git a/build_opts/GCN3_X86 b/build_opts/GCN3_X86 deleted file mode 100644 index fd471871b6..0000000000 --- a/build_opts/GCN3_X86 +++ /dev/null @@ -1,6 +0,0 @@ -RUBY=y -RUBY_PROTOCOL_GPU_VIPER=y -BUILD_ISA=y -USE_X86_ISA=y -GCN3_GPU_ISA=y -BUILD_GPU=y diff --git a/build_tools/sim_object_param_struct_hh.py b/build_tools/sim_object_param_struct_hh.py index c82c25921c..23e10a9bfa 100644 --- a/build_tools/sim_object_param_struct_hh.py +++ b/build_tools/sim_object_param_struct_hh.py @@ -211,8 +211,7 @@ code.indent() if sim_object == SimObject: code( """ -SimObjectParams() {} -virtual ~SimObjectParams() {} +virtual ~SimObjectParams() = default; std::string name; """ diff --git a/configs/deprecated/example/se.py b/configs/deprecated/example/se.py index afdb82489d..6ad4b02b32 100644 --- a/configs/deprecated/example/se.py +++ b/configs/deprecated/example/se.py @@ -224,7 +224,7 @@ for cpu in system.cpu: if ObjectList.is_kvm_cpu(CPUClass) or ObjectList.is_kvm_cpu(FutureClass): if buildEnv["USE_X86_ISA"]: system.kvm_vm = KvmVM() - system.m5ops_base = 0xFFFF0000 + system.m5ops_base = max(0xFFFF0000, Addr(args.mem_size).getValue()) for process in multiprocesses: process.useArchPT = True process.kvmInSE = True diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py index f9daf8a88b..eb7c625cad 100644 --- a/configs/example/apu_se.py +++ b/configs/example/apu_se.py @@ -335,6 +335,12 @@ parser.add_argument( default="dynamic", help="register allocation policy (simple/dynamic)", ) +parser.add_argument( + "--register-file-cache-size", + type=int, + default=0, + help="number of registers in cache", +) parser.add_argument( "--dgpu", @@ -369,11 +375,33 @@ parser.add_argument( parser.add_argument( "--gfx-version", type=str, - default="gfx801", + default="gfx902", choices=GfxVersion.vals, help="Gfx version for gpuNote: gfx902 is not fully supported by ROCm", ) +parser.add_argument( + "--tcp-rp", + type=str, + default="TreePLRURP", + help="cache replacement policy" "policy for tcp", +) + +parser.add_argument( + "--tcc-rp", + type=str, + default="TreePLRURP", + help="cache replacement policy" "policy for tcc", +) + +# sqc rp both changes sqc rp and scalar cache rp +parser.add_argument( + "--sqc-rp", + type=str, + default="TreePLRURP", + help="cache replacement policy" "policy for sqc", +) + Ruby.define_options(parser) # add TLB options to the parser @@ -428,6 +456,7 @@ print( # shader is the GPU shader = Shader( n_wf=args.wfs_per_simd, + cu_per_sqc=args.cu_per_sqc, clk_domain=SrcClockDomain( clock=args.gpu_clock, voltage_domain=VoltageDomain(voltage=args.gpu_voltage), @@ -493,6 +522,7 @@ for i in range(n_cu): vrfs = [] vrf_pool_mgrs = [] srfs = [] + rfcs = [] srf_pool_mgrs = [] for j in range(args.simds_per_cu): for k in range(shader.n_wf): @@ -537,10 +567,16 @@ for i in range(n_cu): simd_id=j, wf_size=args.wf_size, num_regs=args.sreg_file_size ) ) + rfcs.append( + RegisterFileCache( + simd_id=j, cache_size=args.register_file_cache_size + ) + ) compute_units[-1].wavefronts = wavefronts compute_units[-1].vector_register_file = vrfs compute_units[-1].scalar_register_file = srfs + compute_units[-1].register_file_cache = rfcs compute_units[-1].register_manager = RegisterManager( policy=args.registerManagerPolicy, vrf_pool_managers=vrf_pool_mgrs, @@ -671,7 +707,7 @@ render_driver = GPURenderDriver(filename=f"dri/renderD{renderDriNum}") gpu_hsapp = HSAPacketProcessor( pioAddr=hsapp_gpu_map_paddr, numHWQueues=args.num_hw_queues ) -dispatcher = GPUDispatcher() +dispatcher = GPUDispatcher(kernel_exit_events=True) gpu_cmd_proc = GPUCommandProcessor(hsapp=gpu_hsapp, dispatcher=dispatcher) gpu_driver.device = gpu_cmd_proc shader.dispatcher = dispatcher @@ -798,6 +834,8 @@ if fast_forward: # configure the TLB hierarchy GPUTLBConfig.config_tlb_hierarchy(args, system, shader_idx) +system.exit_on_work_items = True + # create Ruby system system.piobus = IOXBar( width=32, response_latency=0, frontend_latency=0, forward_latency=0 @@ -938,19 +976,15 @@ root = Root(system=system, full_system=False) # knows what type of GPU hardware we are simulating if args.dgpu: assert args.gfx_version in [ - "gfx803", "gfx900", ], "Incorrect gfx version for dGPU" - if args.gfx_version == "gfx803": - hsaTopology.createFijiTopology(args) - elif args.gfx_version == "gfx900": + if args.gfx_version == "gfx900": hsaTopology.createVegaTopology(args) else: assert args.gfx_version in [ - "gfx801", "gfx902", ], "Incorrect gfx version for APU" - hsaTopology.createCarrizoTopology(args) + hsaTopology.createRavenTopology(args) m5.ticks.setGlobalFrequency("1THz") if args.abs_max_tick: @@ -976,6 +1010,41 @@ if args.fast_forward: exit_event = m5.simulate(maxtick) +while True: + if ( + exit_event.getCause() == "m5_exit instruction encountered" + or exit_event.getCause() == "user interrupt received" + or exit_event.getCause() == "simulate() limit reached" + or "exiting with last active thread context" in exit_event.getCause() + ): + print(f"breaking loop due to: {exit_event.getCause()}.") + break + elif "checkpoint" in exit_event.getCause(): + assert args.checkpoint_dir is not None + m5.checkpoint(args.checkpoint_dir) + print("breaking loop with checkpoint") + break + elif "GPU Kernel Completed" in exit_event.getCause(): + print("GPU Kernel Completed dump and reset") + m5.stats.dump() + m5.stats.reset() + elif "GPU Blit Kernel Completed" in exit_event.getCause(): + print("GPU Blit Kernel Completed dump and reset") + m5.stats.dump() + m5.stats.reset() + elif "workbegin" in exit_event.getCause(): + print("m5 work begin dump and reset") + m5.stats.dump() + m5.stats.reset() + elif "workend" in exit_event.getCause(): + print("m5 work end dump and reset") + m5.stats.dump() + m5.stats.reset() + else: + print(f"Unknown exit event: {exit_event.getCause()}. Continuing...") + + exit_event = m5.simulate(maxtick - m5.curTick()) + if args.fast_forward: if exit_event.getCause() == "a thread reached the max instruction count": m5.switchCpus(system, switch_cpu_list) diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py index 33cf7b2f40..6d5b06b9ae 100644 --- a/configs/example/arm/starter_se.py +++ b/configs/example/arm/starter_se.py @@ -53,15 +53,24 @@ from common import ( MemConfig, ObjectList, ) -from common.cores.arm import HPI +from common.cores.arm import ( + HPI, + O3_ARM_v7a, +) # Pre-defined CPU configurations. Each tuple must be ordered as : (cpu_class, -# l1_icache_class, l1_dcache_class, walk_cache_class, l2_Cache_class). Any of +# l1_icache_class, l1_dcache_class, l2_Cache_class). Any of # the cache class may be 'None' if the particular cache is not present. cpu_types = { "atomic": (AtomicSimpleCPU, None, None, None), "minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2), "hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2), + "o3": ( + O3_ARM_v7a.O3_ARM_v7a_3, + O3_ARM_v7a.O3_ARM_v7a_ICache, + O3_ARM_v7a.O3_ARM_v7a_DCache, + O3_ARM_v7a.O3_ARM_v7aL2, + ), } diff --git a/configs/example/cache_partitioning.py b/configs/example/cache_partitioning.py new file mode 100644 index 0000000000..9a363756e8 --- /dev/null +++ b/configs/example/cache_partitioning.py @@ -0,0 +1,201 @@ +# Copyright (c) 2024 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This script showcases the functionality of cache partitioning policies, +# containg a simple system comprised of a memory requestor (TrafficGen), +# a cache enforcing policies for requests and a SimpleMemory backing store. +# +# Using the Way policy, the cache should show the following statistics in the +# provided configuration: +# +# | Allocated Ways | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | +# |----------------|---|-----|-----|-----|-----|-----|-----|------| +# | Cache Hits | 0 | 256 | 384 | 512 | 640 | 768 | 896 | 1024 | +# +# Using the MaxCapacity policy, expected results are the following: +# +# | Allocation % | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 | +# |--------------|----|-----|-----|-----|-----|-----|-----|-----|-----|------| +# | Cache Hits | 0 | 152 | 307 | 409 | 512 | 614 | 716 | 819 | 921 | 1024 | + +import argparse + +import m5 +from m5.objects import * + + +def capacityAllocation(capacity_str): + """ + Verify that Max Capacity partitioning policy has been provided with a suitable + configuration + """ + capacity = float(capacity_str) + + if capacity > 1 or capacity < 0: + raise argparse.ArgumentTypeError( + "Max Capacity Policy needs allocation in range [0, 1]" + ) + + return capacity + + +def wayAllocation(way_str): + """ + Verify that Way partitioning policy has been provided with a suitable + configuration + """ + way_alloc = int(way_str) + + if way_alloc < 0: + raise argparse.ArgumentTypeError( + "Way Policy needs positive number of ways" + ) + + return way_alloc + + +def generatePartPolicy(args): + """ + Generate Partitioning Policy object based on provided arguments + """ + assert args.policy in [ + "way", + "max_capacity", + ], "Only support generating way and max_capacity policies" + + if args.policy == "way": + allocated_ways = [way for way in range(0, args.way_allocation)] + allocation = WayPolicyAllocation(partition_id=0, ways=allocated_ways) + + return WayPartitioningPolicy(allocations=[allocation]) + + if args.policy == "max_capacity": + return MaxCapacityPartitioningPolicy( + partition_ids=[0], capacities=[args.capacity_allocation] + ) + + +def configSystem(): + """ + Configure base system and memory + """ + + system = System(membus=IOXBar(width=128)) + system.clk_domain = SrcClockDomain( + clock="10THz", + voltage_domain=VoltageDomain(), + ) + + # Memory configuration + system.mem_ctrl = SimpleMemory(bandwidth="1GiB/s", latency="10ns") + + # add memory + system.mem_ctrl.range = AddrRange("64KiB") + system.mem_ctrl.port = system.membus.mem_side_ports + return system + + +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) + +parser.add_argument( + "--policy", + default="way", + choices=["way", "max_capacity"], + help="This option defines which Cache Partitioning Policy to use for " + "the system cache", +) + +parser.add_argument( + "--capacity-allocation", + type=capacityAllocation, + default=0.5, + help="The amount of the cache to partition to the default PartitionID " + "when using Max Capacity Cache Partitioning Policy in [0,1] range", +) + +parser.add_argument( + "--way-allocation", + type=wayAllocation, + default=4, + help="The number of ways in the cache to partition to the default " + "PartitionID when using Way Cache Partitioning Policy", +) + +args = parser.parse_args() + +m5.ticks.setGlobalFrequency("10THz") +system = configSystem() + +# create a cache to sit between the memory and traffic gen to enforce +# partitioning policies +part_manager = PartitionManager( + partitioning_policies=[generatePartPolicy(args)] +) +system.cache = NoncoherentCache( + size="64KiB", + assoc=8, + partitioning_manager=part_manager, + tag_latency=0, + data_latency=0, + response_latency=0, + mshrs=1, + tgts_per_mshr=8, + write_buffers=1, + replacement_policy=MRURP(), +) +system.cache.mem_side = system.membus.cpu_side_ports + +# instantiate traffic gen and connect to crossbar +system.tgen = PyTrafficGen() +system.tgen.port = system.cache.cpu_side + +# finalise config and run simulation +root = Root(full_system=False, system=system) +root.system.mem_mode = "timing" +m5.instantiate() + +# configure traffic generator to do 2x 64KiB sequential reads from address 0 +# to 65536; one to warm up the cache one to test cache partitioning +linear_tgen = system.tgen.createLinear( + 1000000000, 0, 65536, 64, 1, 1, 100, 65536 +) +exit_tgen = system.tgen.createExit(1) +system.tgen.start([linear_tgen, linear_tgen, exit_tgen]) + +# handle exit reporting +exit_event = m5.simulate(2000000000) +print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}") diff --git a/configs/example/gem5_library/arm-hello.py b/configs/example/gem5_library/arm-hello.py index 39583463e7..87d1f75d8c 100644 --- a/configs/example/gem5_library/arm-hello.py +++ b/configs/example/gem5_library/arm-hello.py @@ -84,7 +84,7 @@ board.set_se_binary_workload( # Any resource specified in this file will be automatically retrieved. # At the time of writing, this file is a WIP and does not contain all # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096 - obtain_resource("arm-hello64-static") + obtain_resource("arm-hello64-static", resource_version="1.0.0") ) # Lastly we run the simulation. diff --git a/configs/example/gem5_library/arm-ubuntu-run-with-kvm.py b/configs/example/gem5_library/arm-ubuntu-run-with-kvm.py new file mode 100644 index 0000000000..62da70c023 --- /dev/null +++ b/configs/example/gem5_library/arm-ubuntu-run-with-kvm.py @@ -0,0 +1,143 @@ +# Copyright (c) 2022-23 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script further shows an example of booting an ARM based full system Ubuntu +disk image. This simulation boots the disk image using 2 TIMING CPU cores. The +simulation ends when the startup is completed successfully (i.e. when an +`m5_exit instruction is reached on successful boot). + +Usage +----- + +``` +scons build/ARM/gem5.opt -j +./build/ARM/gem5.opt configs/example/gem5_library/arm-ubuntu-run-with-kvm.py +``` + +""" + +from m5.objects import ( + ArmDefaultRelease, + VExpress_GEM5_V1, +) + +from gem5.coherence_protocol import CoherenceProtocol +from gem5.components.boards.arm_board import ArmBoard +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.processors.cpu_types import CPUTypes +from gem5.components.processors.simple_switchable_processor import ( + SimpleSwitchableProcessor, +) +from gem5.isas import ISA +from gem5.resources.resource import obtain_resource +from gem5.simulate.exit_event import ExitEvent +from gem5.simulate.simulator import Simulator +from gem5.utils.requires import requires + +# This runs a check to ensure the gem5 binary is compiled for ARM. +requires(isa_required=ISA.ARM) + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) + +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( + l1d_size="16kB", l1i_size="16kB", l2_size="256kB" +) + +# Memory: Dual Channel DDR4 2400 DRAM device. +memory = DualChannelDDR4_2400(size="2GB") + +# Here we setup the processor. This is a special switchable processor in which +# a starting core type and a switch core type must be specified. Once a +# configuration is instantiated a user may call `processor.switch()` to switch +# from the starting core types to the switch core types. In this simulation +# we start with KVM cores to simulate the OS boot, then switch to the Timing +# cores for the command we wish to run after boot. +processor = SimpleSwitchableProcessor( + starting_core_type=CPUTypes.KVM, + switch_core_type=CPUTypes.TIMING, + isa=ISA.ARM, + num_cores=2, +) + +# The ArmBoard requires a `release` to be specified. This adds all the +# extensions or features to the system. We are setting this to for_kvm() +# to enable KVM simulation. +release = ArmDefaultRelease.for_kvm() + +# The platform sets up the memory ranges of all the on-chip and off-chip +# devices present on the ARM system. ARM KVM only works with VExpress_GEM5_V1 +# on the ArmBoard at the moment. +platform = VExpress_GEM5_V1() + +# Here we setup the board. The ArmBoard allows for Full-System ARM simulations. +board = ArmBoard( + clk_freq="3GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + release=release, + platform=platform, +) +# This is the command to run after the system has booted. The first `m5 exit` +# will stop the simulation so we can switch the CPU cores from KVM to timing +# and continue the simulation to run the echo command, sleep for a second, +# then, again, call `m5 exit` to terminate the simulation. After simulation +# has ended you may inspect `m5out/system.pc.com_1.device` to see the echo +# output. +command = ( + "m5 --addr=0x10010000 exit;" + + "echo 'This is running on Timing CPU cores.';" + + "m5 exit;" +) + +# Here we set a full system workload. The "arm64-ubuntu-20.04-boot" boots +# Ubuntu 20.04. We use arm64-bootloader (boot.arm64) as the bootloader to use +# ARM KVM. +board.set_kernel_disk_workload( + kernel=obtain_resource( + "arm64-linux-kernel-5.4.49", resource_version="1.0.0" + ), + disk_image=obtain_resource( + "arm64-ubuntu-20.04-img", resource_version="1.0.0" + ), + bootloader=obtain_resource("arm64-bootloader", resource_version="1.0.0"), + readfile_contents=command, +) +# We define the system with the aforementioned system defined. +simulator = Simulator( + board=board, + on_exit_event={ExitEvent.EXIT: (func() for func in [processor.switch])}, +) + +# Once the system successfully boots, it encounters an +# `m5_exit instruction encountered`. We stop the simulation then. When the +# simulation has ended you may inspect `m5out/board.terminal` to see +# the stdout. +simulator.run() diff --git a/configs/example/gem5_library/arm-ubuntu-run.py b/configs/example/gem5_library/arm-ubuntu-run.py index 734fb9ee1b..4c784d6f9d 100644 --- a/configs/example/gem5_library/arm-ubuntu-run.py +++ b/configs/example/gem5_library/arm-ubuntu-run.py @@ -102,7 +102,9 @@ board = ArmBoard( # Here we set a full system workload. The "arm64-ubuntu-20.04-boot" boots # Ubuntu 20.04. -board.set_workload(obtain_resource("arm64-ubuntu-20.04-boot")) +board.set_workload( + obtain_resource("arm64-ubuntu-20.04-boot", resource_version="2.0.0") +) # We define the system with the aforementioned system defined. diff --git a/configs/example/gem5_library/caches/octopi-cache-example.py b/configs/example/gem5_library/caches/octopi-cache-example.py index fa19773167..80a0c71865 100644 --- a/configs/example/gem5_library/caches/octopi-cache-example.py +++ b/configs/example/gem5_library/caches/octopi-cache-example.py @@ -97,7 +97,9 @@ board = ArmBoard( platform=platform, ) -board.set_workload(obtain_resource("arm64-ubuntu-20.04-boot")) +board.set_workload( + obtain_resource("arm64-ubuntu-20.04-boot", resource_version="2.0.0") +) simulator = Simulator(board=board) simulator.run() diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py index aa78de5647..273ee92ae6 100644 --- a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py +++ b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py @@ -90,7 +90,9 @@ board = SimpleBoard( board.set_se_binary_workload( # the workload should be the same as the save-checkpoint script obtain_resource("riscv-hello"), - checkpoint=obtain_resource("riscv-hello-example-checkpoint"), + checkpoint=obtain_resource( + "riscv-hello-example-checkpoint", resource_version="3.0.0" + ), ) simulator = Simulator( diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py index b024a3a44a..c231203d56 100644 --- a/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py +++ b/configs/example/gem5_library/checkpoints/riscv-hello-save-checkpoint.py @@ -107,8 +107,8 @@ board.set_se_binary_workload( # Lastly we run the simulation. max_ticks = 10**6 -simulator = Simulator(board=board, full_system=False) -simulator.run(max_ticks=max_ticks) +simulator = Simulator(board=board, full_system=False, max_ticks=max_ticks) +simulator.run() print( "Exiting @ tick {} because {}.".format( diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py index 284289be6f..a396869df3 100644 --- a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py +++ b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py @@ -60,8 +60,8 @@ from m5.stats import ( ) from gem5.components.boards.simple_board import SimpleBoard -from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( - PrivateL1PrivateL2CacheHierarchy, +from gem5.components.cachehierarchies.classic.private_l1_private_l2_walk_cache_hierarchy import ( + PrivateL1PrivateL2WalkCacheHierarchy, ) from gem5.components.memory import DualChannelDDR4_2400 from gem5.components.processors.cpu_types import CPUTypes @@ -80,7 +80,7 @@ requires(isa_required=ISA.X86) # The cache hierarchy can be different from the cache hierarchy used in taking # the checkpoints -cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( l1d_size="32kB", l1i_size="32kB", l2_size="256kB", @@ -125,7 +125,9 @@ board.set_se_simpoint_workload( weight_list=[0.1, 0.2, 0.4, 0.3], warmup_interval=1000000, ), - checkpoint=obtain_resource("simpoints-se-checkpoints-v23-0-v1"), + checkpoint=obtain_resource( + "simpoints-se-checkpoints", resource_version="3.0.0" + ), ) diff --git a/configs/example/gem5_library/dramsys/arm-hello-dramsys.py b/configs/example/gem5_library/dramsys/arm-hello-dramsys.py index 2561f98fae..3a88d4ce9a 100644 --- a/configs/example/gem5_library/dramsys/arm-hello-dramsys.py +++ b/configs/example/gem5_library/dramsys/arm-hello-dramsys.py @@ -78,7 +78,7 @@ board.set_se_binary_workload( # Any resource specified in this file will be automatically retrieved. # At the time of writing, this file is a WIP and does not contain all # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096 - obtain_resource("arm-hello64-static") + obtain_resource("arm-hello64-static", resource_version="1.0.0") ) # Lastly we run the simulation. diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py index 781b2f7281..a97ea39d17 100644 --- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py +++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py @@ -48,8 +48,8 @@ from m5.stats import ( ) from gem5.components.boards.simple_board import SimpleBoard -from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( - PrivateL1PrivateL2CacheHierarchy, +from gem5.components.cachehierarchies.classic.private_l1_private_l2_walk_cache_hierarchy import ( + PrivateL1PrivateL2WalkCacheHierarchy, ) from gem5.components.memory import DualChannelDDR4_2400 from gem5.components.processors.cpu_types import CPUTypes @@ -90,7 +90,7 @@ args = parser.parse_args() # The cache hierarchy can be different from the cache hierarchy used in taking # the checkpoints -cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( l1d_size="32kB", l1i_size="32kB", l2_size="256kB", diff --git a/configs/example/gem5_library/multisim/multisim-fs-x86-npb.py b/configs/example/gem5_library/multisim/multisim-fs-x86-npb.py new file mode 100644 index 0000000000..eff2b0c48f --- /dev/null +++ b/configs/example/gem5_library/multisim/multisim-fs-x86-npb.py @@ -0,0 +1,138 @@ +# Copyright (c) 2024 The Regents of the University of California. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""An example of a single configuration script for defining multiple +simulations through the gem5 `multisim` module. + +This script creates 6 full system simulations by interating through a suite +of benchmarks and different cores counts. + +Usage +----- + +1. To run all the simulations defined in this script:: + +```shell + -m gem5.utils.multisim \ + configs/example/gem5_library/multisim/multisim-fs-x86-npb.py +``` + +2. To run a specific simulation defined in this script: + +```shell + configs/example/gem5_library/multisim/multisim-fs-x86-npb.py \ + # e.g. npb-bt-a_cores-1 +``` + +3. To list all the IDs of the simulations defined in this script: + +```shell + configs/example/gem5_library/multisim/multisim-fs-x86-npb.py -l +``` +""" + +import m5 + +import gem5.utils.multisim as multisim +from gem5.coherence_protocol import CoherenceProtocol +from gem5.components.boards.x86_board import X86Board +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.processors.cpu_types import CPUTypes +from gem5.components.processors.simple_switchable_processor import ( + SimpleSwitchableProcessor, +) +from gem5.isas import ISA +from gem5.resources.resource import obtain_resource +from gem5.simulate.simulator import ( + ExitEvent, + Simulator, +) +from gem5.utils.requires import requires + +requires( + isa_required=ISA.X86, + coherence_protocol_required=CoherenceProtocol.MESI_TWO_LEVEL, +) + +from gem5.components.cachehierarchies.ruby.mesi_two_level_cache_hierarchy import ( + MESITwoLevelCacheHierarchy, +) + + +def handle_workbegin(): + m5.stats.reset() + processor.switch() + yield False + + +def handle_workend(): + m5.stats.dump() + yield True + + +# Set the maximum number of concurrent processes to be 3. +multisim.set_num_processes(3) + +# Here we imagine an experiment wanting to run each NPB benchmark on the same +# system twice: once with 1 core and once with 2 cores. +for benchmark in obtain_resource("npb-benchmark-suite"): + for num_cores in [1, 2]: + cache_hierarchy = MESITwoLevelCacheHierarchy( + l1d_size="32kB", + l1i_size="32kB", + l2_size="256kB", + l1d_assoc=8, + l1i_assoc=8, + l2_assoc=16, + num_l2_banks=2, + ) + memory = DualChannelDDR4_2400(size="3GB") + processor = SimpleSwitchableProcessor( + starting_core_type=CPUTypes.ATOMIC, + switch_core_type=CPUTypes.TIMING, + isa=ISA.X86, + num_cores=num_cores, + ) + board = X86Board( + clk_freq="3GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + ) + + board.set_workload(benchmark) + + simulator = Simulator( + board=board, + on_exit_event={ + ExitEvent.WORKBEGIN: handle_workbegin(), + ExitEvent.WORKEND: handle_workend(), + }, + ) + + simulator.set_id(f"{benchmark.get_id()}_cores-{num_cores}") + + multisim.add_simulator(simulator) diff --git a/configs/example/gem5_library/multisim/multisim-print-this.py b/configs/example/gem5_library/multisim/multisim-print-this.py new file mode 100644 index 0000000000..bd724a5d92 --- /dev/null +++ b/configs/example/gem5_library/multisim/multisim-print-this.py @@ -0,0 +1,87 @@ +# Copyright (c) 2024 The Regents of the University of California +# All Rights Reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""An example of a single configuration script for defining multiple +simulations through the gem5 `multisim` module. + +This script is very simple and simply prints a simple message once for each +simulation, outputing the process id. + +Usage +----- + +1. To run all the simulations defined in this script:: + +```shell + -m gem5.utils.multisim \ + configs/example/gem5_library/multisim/multisim-print-this.py +``` + +2. To run a specific simulation defined in this script: + +```shell + configs/example/gem5_library/multisim/multisim-print-this.py \ + process_id_1 +``` + +3. To list all the IDs of the simulations defined in this script: + +```shell + configs/example/gem5_library/multisim/multisim-print-this.py -l +``` +""" + + +import gem5.utils.multisim as multisim +from gem5.components.boards.simple_board import SimpleBoard +from gem5.components.cachehierarchies.classic.no_cache import NoCache +from gem5.components.memory import SingleChannelDDR3_1600 +from gem5.components.processors.cpu_types import CPUTypes +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.isas import ISA +from gem5.resources.resource import obtain_resource +from gem5.simulate.simulator import Simulator + +# Set the maximum number of concurrent processes to be 2. +multisim.set_num_processes(2) + +for process_id in range(5): + cache_hierarchy = NoCache() + memory = SingleChannelDDR3_1600(size="32MB") + processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.X86, num_cores=1 + ) + board = SimpleBoard( + clk_freq="1GHz", + processor=processor, + memory=memory, + cache_hierarchy=cache_hierarchy, + ) + board.set_se_binary_workload( + binary=obtain_resource("x86-print-this"), + arguments=[f"Hello from process {process_id}", 1], + ) + multisim.add_simulator(Simulator(board=board, id=f"process_{process_id}")) diff --git a/configs/example/gem5_library/power-hello.py b/configs/example/gem5_library/power-hello.py index 8a73b6a201..69106baace 100644 --- a/configs/example/gem5_library/power-hello.py +++ b/configs/example/gem5_library/power-hello.py @@ -75,7 +75,9 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -board.set_se_binary_workload(obtain_resource("power-hello")) +board.set_se_binary_workload( + obtain_resource("power-hello", resource_version="1.0.0") +) # Lastly we run the simulation. simulator = Simulator(board=board) diff --git a/configs/example/gem5_library/riscv-fs.py b/configs/example/gem5_library/riscv-fs.py index 914d9a7023..5f37c259ed 100644 --- a/configs/example/gem5_library/riscv-fs.py +++ b/configs/example/gem5_library/riscv-fs.py @@ -40,8 +40,8 @@ Characteristics """ from gem5.components.boards.riscv_board import RiscvBoard -from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( - PrivateL1PrivateL2CacheHierarchy, +from gem5.components.cachehierarchies.classic.private_l1_private_l2_walk_cache_hierarchy import ( + PrivateL1PrivateL2WalkCacheHierarchy, ) from gem5.components.memory import SingleChannelDDR3_1600 from gem5.components.processors.cpu_types import CPUTypes @@ -57,7 +57,7 @@ requires(isa_required=ISA.RISCV) # Setup the cache hierarchy. # For classic, PrivateL1PrivateL2 and NoCache have been tested. # For Ruby, MESI_Two_Level and MI_example have been tested. -cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( l1d_size="32KiB", l1i_size="32KiB", l2_size="512KiB" ) @@ -79,8 +79,10 @@ board = RiscvBoard( # Set the Full System workload. board.set_kernel_disk_workload( - kernel=obtain_resource("riscv-bootloader-vmlinux-5.10"), - disk_image=obtain_resource("riscv-disk-img"), + kernel=obtain_resource( + "riscv-bootloader-vmlinux-5.10", resource_version="1.0.0" + ), + disk_image=obtain_resource("riscv-disk-img", resource_version="1.0.0"), ) simulator = Simulator(board=board) diff --git a/configs/example/gem5_library/riscv-ubuntu-run.py b/configs/example/gem5_library/riscv-ubuntu-run.py index 1d31b055de..c236b69169 100644 --- a/configs/example/gem5_library/riscv-ubuntu-run.py +++ b/configs/example/gem5_library/riscv-ubuntu-run.py @@ -57,12 +57,12 @@ from gem5.utils.requires import requires requires(isa_required=ISA.RISCV) # With RISCV, we use simple caches. -from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( - PrivateL1PrivateL2CacheHierarchy, +from gem5.components.cachehierarchies.classic.private_l1_private_l2_walk_cache_hierarchy import ( + PrivateL1PrivateL2WalkCacheHierarchy, ) # Here we setup the parameters of the l1 and l2 caches. -cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( l1d_size="16kB", l1i_size="16kB", l2_size="256kB" ) @@ -88,7 +88,9 @@ board = RiscvBoard( # Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit` # instruction which stops the simulation. When the simulation has ended you may # inspect `m5out/system.pc.com_1.device` to see the stdout. -board.set_workload(obtain_resource("riscv-ubuntu-20.04-boot")) +board.set_workload( + obtain_resource("riscv-ubuntu-20.04-boot", resource_version="3.0.0") +) simulator = Simulator(board=board) simulator.run() diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py index ad045cac3d..34faed0b2c 100644 --- a/configs/example/gem5_library/riscvmatched-fs.py +++ b/configs/example/gem5_library/riscvmatched-fs.py @@ -76,7 +76,7 @@ board = RISCVMatchedBoard( # In the case where the `-i` flag is passed, we add the kernel argument # `init=/root/exit.sh`. This means the simulation will exit after the Linux # Kernel has booted. -workload = obtain_resource("riscv-ubuntu-20.04-boot") +workload = obtain_resource("riscv-ubuntu-20.04-boot", resource_version="3.0.0") kernel_args = board.get_default_kernel_args() if args.to_init: kernel_args.append("init=/root/exit.sh") diff --git a/configs/example/gem5_library/riscvmatched-hello.py b/configs/example/gem5_library/riscvmatched-hello.py index 3ea13b4851..f95bb051e9 100644 --- a/configs/example/gem5_library/riscvmatched-hello.py +++ b/configs/example/gem5_library/riscvmatched-hello.py @@ -49,7 +49,9 @@ requires(isa_required=ISA.RISCV) board = RISCVMatchedBoard() # set the hello world riscv binary as the board workload -board.set_se_binary_workload(obtain_resource("riscv-hello")) +board.set_se_binary_workload( + obtain_resource("riscv-hello", resource_version="1.0.0") +) # run the simulation with the RISCV Matched board simulator = Simulator(board=board, full_system=False) diff --git a/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py b/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py index 2024bdddf0..01a274b39d 100644 --- a/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py +++ b/configs/example/gem5_library/riscvmatched-microbenchmark-suite.py @@ -45,7 +45,9 @@ requires(isa_required=ISA.RISCV) board = RISCVMatchedBoard() # obtain the RISC-V Vertical Microbenchmarks -microbenchmarks = obtain_resource("riscv-vertical-microbenchmarks") +microbenchmarks = obtain_resource( + "riscv-vertical-microbenchmarks", resource_version="1.0.0" +) # list all the microbenchmarks present in the suite print("Microbenchmarks present in the suite:") diff --git a/configs/example/gem5_library/spatter_gen/spatter-gen-test.py b/configs/example/gem5_library/spatter_gen/spatter-gen-test.py new file mode 100644 index 0000000000..ef0cc04aa5 --- /dev/null +++ b/configs/example/gem5_library/spatter_gen/spatter-gen-test.py @@ -0,0 +1,97 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Script that runs a SpatterGen test with a specific trace file. +This script can be used as an example on how to use SpatterGenerator, +SpatterKernel, and its utilities to run a Spatter trace in gem5. + +The script uses a spatter trace taken from the hpcgarage github repository. +Link to the original trace file: + +https://github.com/hpcgarage/spatter/blob/main/standard-suite/app-traces/amg.json + +It will create a system with `num_cores` SpatterGenerators and interleave the +trace by `intlv_size` elements in the `pattern` field from the trace. +Interleaving is done for assigning part of the access to each core. + +Usage: +------ + +``` +scons build/NULL/gem5.opt +./build/NULL/gem5.opt configs/example/gem5_library/spatter_gen/spatter-gen-test.py +``` +""" +import argparse +import json +from pathlib import Path + +import m5 +from m5.objects import Root + +from gem5.components.boards.test_board import TestBoard +from gem5.components.cachehierarchies.classic.private_l1_cache_hierarchy import ( + PrivateL1CacheHierarchy, +) +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.processors.spatter_gen import ( + SpatterGenerator, + prepare_kernels, +) +from gem5.simulate.simulator import Simulator + +num_cores = 8 +intlv_size = 128 + +memory = DualChannelDDR4_2400(size="8GiB") + +generator = SpatterGenerator( + processing_mode="synchronous", num_cores=num_cores +) + +kernels = prepare_kernels( + Path(__file__).parent / "traces/amg.json", + num_cores, + intlv_size, + 0, + memory.get_size() // 2, +) +for kernel in kernels: + generator.add_kernel(kernel) + +board = TestBoard( + clk_freq="4GHz", + generator=generator, + cache_hierarchy=PrivateL1CacheHierarchy( + l1d_size="32KiB", l1i_size="32KiB" + ), + memory=memory, +) + +simulator = Simulator(board=board, full_system=False) + +simulator.run() diff --git a/configs/example/gem5_library/spatter_gen/traces/amg.json b/configs/example/gem5_library/spatter_gen/traces/amg.json new file mode 100644 index 0000000000..64da33a2e1 --- /dev/null +++ b/configs/example/gem5_library/spatter_gen/traces/amg.json @@ -0,0 +1 @@ +[{"delta": 1, "kernel": "Gather", "pattern": [1333, 0, 1, 2, 36, 37, 38, 72, 73, 74, 1296, 1297, 1298, 1332, 1334, 1368], "count": 1454647}, {"delta": 1, "kernel": "Gather", "pattern": [1333, 0, 1, 36, 37, 72, 73, 1296, 1297, 1332, 1368, 1369, 2592, 2593, 2628, 2629], "count": 1454647}] diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py index 4ef6f52b9c..9864deaae9 100644 --- a/configs/example/gem5_library/x86-gapbs-benchmarks.py +++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py @@ -77,7 +77,9 @@ parser = argparse.ArgumentParser( description="An example configuration script to run the gapbs benchmarks." ) -gapbs_suite = obtain_resource("gapbs-benchmark-suite") +gapbs_suite = obtain_resource( + "gapbs-benchmark-suite", resource_version="1.0.0" +) # The only positional argument accepted is the benchmark name in this script. diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py index 6e6d501c37..a578522a4a 100644 --- a/configs/example/gem5_library/x86-npb-benchmarks.py +++ b/configs/example/gem5_library/x86-npb-benchmarks.py @@ -88,7 +88,7 @@ parser = argparse.ArgumentParser( description="An example configuration script to run the npb benchmarks." ) -npb_suite = obtain_resource("npb-benchmark-suite") +npb_suite = obtain_resource("npb-benchmark-suite", resource_version="1.0.0") # The only positional argument accepted is the benchmark name in this script. parser.add_argument( diff --git a/configs/example/gem5_library/x86-parsec-benchmarks.py b/configs/example/gem5_library/x86-parsec-benchmarks.py index 71cfd4a9ef..36f56c4b95 100644 --- a/configs/example/gem5_library/x86-parsec-benchmarks.py +++ b/configs/example/gem5_library/x86-parsec-benchmarks.py @@ -185,10 +185,12 @@ board.set_kernel_disk_workload( # The x86 linux kernel will be automatically downloaded to the # `~/.cache/gem5` directory if not already present. # PARSEC benchamarks were tested with kernel version 4.19.83 - kernel=obtain_resource("x86-linux-kernel-4.19.83"), + kernel=obtain_resource( + "x86-linux-kernel-4.19.83", resource_version="1.0.0" + ), # The x86-parsec image will be automatically downloaded to the # `~/.cache/gem5` directory if not already present. - disk_image=obtain_resource("x86-parsec"), + disk_image=obtain_resource("x86-parsec", resource_version="1.0.0"), readfile_contents=command, ) diff --git a/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py b/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py index 632b409b16..d96ff80a3c 100644 --- a/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py +++ b/configs/example/gem5_library/x86-ubuntu-run-with-kvm-no-perf.py @@ -121,7 +121,7 @@ command = ( + "m5 exit;" ) -workload = obtain_resource("x86-ubuntu-18.04-boot") +workload = obtain_resource("x86-ubuntu-18.04-boot", resource_version="2.0.0") workload.set_parameter("readfile_contents", command) board.set_workload(workload) diff --git a/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py b/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py index ec361dcd6e..b9d035757c 100644 --- a/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py +++ b/configs/example/gem5_library/x86-ubuntu-run-with-kvm.py @@ -117,7 +117,7 @@ command = ( + "m5 exit;" ) -workload = obtain_resource("x86-ubuntu-18.04-boot") +workload = obtain_resource("x86-ubuntu-18.04-boot", resource_version="2.0.0") workload.set_parameter("readfile_contents", command) board.set_workload(workload) diff --git a/configs/example/gem5_library/x86-ubuntu-run.py b/configs/example/gem5_library/x86-ubuntu-run.py index 3b7b754b90..a8737d7297 100644 --- a/configs/example/gem5_library/x86-ubuntu-run.py +++ b/configs/example/gem5_library/x86-ubuntu-run.py @@ -55,7 +55,9 @@ board = X86DemoBoard() # We then set the workload. Here we use the "x86-ubuntu-18.04-boot" workload. # This boots Ubuntu 18.04 with Linux 5.4.49. If the required resources are not # found locally, they will be downloaded. -board.set_workload(obtain_resource("x86-ubuntu-18.04-boot")) +board.set_workload( + obtain_resource("x86-ubuntu-18.04-boot", resource_version="2.0.0") +) simulator = Simulator(board=board) simulator.run() diff --git a/configs/example/gpufs/Disjoint_VIPER.py b/configs/example/gpufs/Disjoint_VIPER.py index 28f0768c2a..0fd258e0fd 100644 --- a/configs/example/gpufs/Disjoint_VIPER.py +++ b/configs/example/gpufs/Disjoint_VIPER.py @@ -58,6 +58,8 @@ class Disjoint_VIPER(RubySystem): self.network_cpu = DisjointSimple(self) self.network_gpu = DisjointSimple(self) + self.block_size_bytes = options.cacheline_size + # Construct CPU controllers cpu_dir_nodes = construct_dirs(options, system, self, self.network_cpu) (cp_sequencers, cp_cntrl_nodes) = construct_corepairs( diff --git a/configs/example/gpufs/amd/AmdGPUOptions.py b/configs/example/gpufs/amd/AmdGPUOptions.py index 3d6a8cc48e..9996d33a2e 100644 --- a/configs/example/gpufs/amd/AmdGPUOptions.py +++ b/configs/example/gpufs/amd/AmdGPUOptions.py @@ -247,3 +247,9 @@ def addAmdGPUOptions(parser): default="simple", help="register allocation policy (simple/dynamic)", ) + parser.add_argument( + "--register-file-cache-size", + type=int, + default=0, + help="number of registers in cache", + ) diff --git a/configs/example/gpufs/mi200.py b/configs/example/gpufs/mi200.py new file mode 100644 index 0000000000..cc4f5df787 --- /dev/null +++ b/configs/example/gpufs/mi200.py @@ -0,0 +1,159 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import argparse +import base64 +import os +import sys +import tempfile + +import runfs +from amd import AmdGPUOptions +from common import ( + GPUTLBOptions, + Options, +) +from ruby import Ruby + +import m5 + +demo_runscript_without_checkpoint = """\ +export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH +export HSA_ENABLE_INTERRUPT=0 +export HCC_AMDGPU_TARGET=gfx90a +free -m +dmesg -n8 +dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128 +if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then + echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." + /sbin/m5 exit +fi +modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0 +echo "Running {} {}" +echo "{}" | base64 -d > myapp +chmod +x myapp +./myapp {} +/sbin/m5 exit +""" + +demo_runscript_with_checkpoint = """\ +export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH +export HSA_ENABLE_INTERRUPT=0 +export HCC_AMDGPU_TARGET=gfx90a +dmesg -n8 +dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128 +if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then + echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." + /sbin/m5 exit +fi +modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0 +echo "Running {} {}" +echo "{}" | base64 -d > myapp +chmod +x myapp +/sbin/m5 checkpoint +./myapp {} +/sbin/m5 exit +""" + + +def addDemoOptions(parser): + parser.add_argument( + "-a", "--app", default=None, help="GPU application to run" + ) + parser.add_argument( + "-o", "--opts", default="", help="GPU application arguments" + ) + + +def runMI200GPUFS(cpu_type): + parser = argparse.ArgumentParser() + runfs.addRunFSOptions(parser) + Options.addCommonOptions(parser) + AmdGPUOptions.addAmdGPUOptions(parser) + Ruby.define_options(parser) + GPUTLBOptions.tlb_options(parser) + addDemoOptions(parser) + + # Parse now so we can override options + args = parser.parse_args() + demo_runscript = "" + + # Create temp script to run application + if args.app is None: + print(f"No application given. Use {sys.argv[0]} -a ") + sys.exit(1) + elif args.kernel is None: + print(f"No kernel path given. Use {sys.argv[0]} --kernel ") + sys.exit(1) + elif args.disk_image is None: + print(f"No disk path given. Use {sys.argv[0]} --disk-image ") + sys.exit(1) + elif not os.path.isfile(args.app): + print("Could not find applcation", args.app) + sys.exit(1) + + # Choose runscript Based on whether any checkpointing args are set + if args.checkpoint_dir is not None: + demo_runscript = demo_runscript_with_checkpoint + else: + demo_runscript = demo_runscript_without_checkpoint + + with open(os.path.abspath(args.app), "rb") as binfile: + encodedBin = base64.b64encode(binfile.read()).decode() + + _, tempRunscript = tempfile.mkstemp() + with open(tempRunscript, "w") as b64file: + runscriptStr = demo_runscript.format( + args.app, args.opts, encodedBin, args.opts + ) + b64file.write(runscriptStr) + + if args.second_disk == None: + args.second_disk = args.disk_image + + # Defaults for MI200 + args.ruby = True + args.cpu_type = "X86KvmCPU" + args.mem_size = "8GB" # CPU host memory + args.dgpu = True + args.dgpu_mem_size = "16GB" # GPU device memory + args.dgpu_start = "0GB" + args.checkpoint_restore = 0 + args.disjoint = True + args.timing_gpu = True + args.script = tempRunscript + args.dgpu_xor_low_bit = 0 + args.gpu_device = "MI200" + + # Run gem5 + runfs.runGpuFSSystem(args) + + +if __name__ == "__m5_main__": + runMI200GPUFS("X86KvmCPU") diff --git a/configs/example/gpufs/mi300.py b/configs/example/gpufs/mi300.py new file mode 100644 index 0000000000..9e0e0da622 --- /dev/null +++ b/configs/example/gpufs/mi300.py @@ -0,0 +1,172 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +""" This file creates an X86 system with a KVM CPU and GPU device capable of +running the MI300 ISA (gfx942). Most of this file sets up a runscript which +will load in a binary, shell script, or python file from the host and run that +within gem5. Jump to line 146 for list of system parameters to configure. +""" + +import argparse +import base64 +import os +import sys +import tempfile +from typing import Optional + +import runfs +from amd import AmdGPUOptions +from common import ( + GPUTLBOptions, + Options, +) +from ruby import Ruby + +import m5 + +from gem5.resources.resource import AbstractResource + +demo_runscript_without_checkpoint = """\ +export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH +export HSA_ENABLE_INTERRUPT=0 +export HCC_AMDGPU_TARGET=gfx942 +export HSA_OVERRIDE_GFX_VERSION="9.4.2" +dmesg -n8 +cat /proc/cpuinfo +dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128 +if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then + echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." + /sbin/m5 exit +fi +modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0 +echo "Running {} {}" +echo "{}" | base64 -d > myapp +chmod +x myapp +./myapp {} +/sbin/m5 exit +""" + +demo_runscript_with_checkpoint = """\ +export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH +export HSA_ENABLE_INTERRUPT=0 +export HCC_AMDGPU_TARGET=gfx942 +export HSA_OVERRIDE_GFX_VERSION="9.4.2" +dmesg -n8 +dd if=/root/roms/mi200.rom of=/dev/mem bs=1k seek=768 count=128 +if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then + echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." + /sbin/m5 exit +fi +modprobe -v amdgpu ip_block_mask=0x6f ppfeaturemask=0 dpm=0 audio=0 ras_enable=0 +echo "Running {} {}" +echo "{}" | base64 -d > myapp +chmod +x myapp +/sbin/m5 checkpoint +./myapp {} +/sbin/m5 exit +""" + + +def addDemoOptions(parser): + parser.add_argument( + "-a", "--app", default=None, help="GPU application to run" + ) + parser.add_argument( + "-o", "--opts", default="", help="GPU application arguments" + ) + + +def runMI300GPUFS( + cpu_type, + disk: Optional[AbstractResource] = None, + kernel: Optional[AbstractResource] = None, + app: Optional[AbstractResource] = None, +): + parser = argparse.ArgumentParser() + runfs.addRunFSOptions(parser) + Options.addCommonOptions(parser) + AmdGPUOptions.addAmdGPUOptions(parser) + Ruby.define_options(parser) + GPUTLBOptions.tlb_options(parser) + addDemoOptions(parser) + + # Parse now so we can override options + args = parser.parse_args() + demo_runscript = "" + + if disk != None: + args.disk_image = disk.get_local_path() + if kernel != None: + args.kernel = kernel.get_local_path() + if app != None: + args.app = app.get_local_path() + + # Create temp script to run application + if not os.path.isfile(args.app): + print("Could not find applcation", args.app) + sys.exit(1) + + # Choose runscript Based on whether any checkpointing args are set + if args.checkpoint_dir is not None: + demo_runscript = demo_runscript_with_checkpoint + else: + demo_runscript = demo_runscript_without_checkpoint + + with open(os.path.abspath(args.app), "rb") as binfile: + encodedBin = base64.b64encode(binfile.read()).decode() + + _, tempRunscript = tempfile.mkstemp() + with open(tempRunscript, "w") as b64file: + runscriptStr = demo_runscript.format( + args.app, args.opts, encodedBin, args.opts + ) + b64file.write(runscriptStr) + + args.script = tempRunscript + + # Defaults for CPU + args.cpu_type = "X86KvmCPU" + args.mem_size = "8GB" + + # Defaults for MI300X + args.gpu_device = "MI300X" + args.dgpu_mem_size = "16GB" # GPU memory size, must be 16GB currently. + + # See: https://rocm.docs.amd.com/en/latest/conceptual/gpu-arch/mi300.html + # Topology for one XCD. Number of CUs is approximately 304 / 8, rounded + # up to 40 due to gem5 restriction of 4 CUs per SQC / scalar cache. + args.num_compute_units = 40 + args.gpu_topology = "Crossbar" + + # Run gem5 + runfs.runGpuFSSystem(args) + + +if __name__ == "__m5_main__": + runMI300GPUFS("X86KvmCPU") diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py index 9dcc1187f3..2220c33df5 100644 --- a/configs/example/gpufs/runfs.py +++ b/configs/example/gpufs/runfs.py @@ -134,23 +134,41 @@ def addRunFSOptions(parser): parser.add_argument( "--gpu-device", default="Vega10", - choices=["Vega10", "MI100", "MI200"], - help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or " - "MI200 (gfx90a)", + choices=["Vega10", "MI100", "MI200", "MI300X"], + help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), MI200 " + "(gfx90a), or MI300X (gfx942).", ) parser.add_argument( - "--debug-at-gpu-kernel", + "--debug-at-gpu-task", type=int, default=-1, - help="Turn on debug flags starting with this kernel", + help="Turn on debug flags starting with this task (counting both blit" + " and non-blit kernels)", ) parser.add_argument( - "--exit-at-gpu-kernel", + "--exit-at-gpu-task", type=int, default=-1, - help="Exit simulation after running this many kernels", + help="Exit simulation after running this many tasks (counting both " + "blit and non-blit kernels)", + ) + + parser.add_argument( + "--exit-after-gpu-kernel", + type=int, + default=-1, + help="Exit simulation after completing this (non-blit) kernel", + ) + + parser.add_argument( + "--skip-until-gpu-kernel", + type=int, + default=0, + help="Skip (non-blit) kernels until reaching this kernel. Note that " + "this can impact correctness (the skipped kernels are completely " + "skipped, not fast forwarded)", ) parser.add_argument( @@ -177,6 +195,28 @@ def addRunFSOptions(parser): help="Disable KVM perf counters (use this with LSF / ETX)", ) + parser.add_argument( + "--tcp-rp", + type=str, + default="TreePLRURP", + help="cache replacement policy" "policy for tcp", + ) + + parser.add_argument( + "--tcc-rp", + type=str, + default="TreePLRURP", + help="cache replacement policy" "policy for tcc", + ) + + # sqc rp both changes sqc rp and scalar cache rp + parser.add_argument( + "--sqc-rp", + type=str, + default="TreePLRURP", + help="cache replacement policy" "policy for sqc", + ) + def runGpuFSSystem(args): """ @@ -230,8 +270,9 @@ def runGpuFSSystem(args): print("Running the simulation") sim_ticks = args.abs_max_tick - kernels_launched = 0 - if args.debug_at_gpu_kernel != -1: + kernels_completed = 0 + tasks_completed = 0 + if args.debug_at_gpu_task != -1: m5.trace.disable() exit_event = m5.simulate(sim_ticks) @@ -249,16 +290,27 @@ def runGpuFSSystem(args): m5.checkpoint(args.checkpoint_dir) break elif "GPU Kernel Completed" in exit_event.getCause(): - kernels_launched += 1 + if kernels_completed == args.exit_after_gpu_kernel: + print(f"Exiting after GPU kernel {kernels_completed}") + break + kernels_completed += 1 + tasks_completed += 1 + elif "GPU Blit Kernel Completed" in exit_event.getCause(): + tasks_completed += 1 + elif "Skipping GPU Kernel" in exit_event.getCause(): + print(f"Skipping GPU kernel {kernels_completed}") + kernels_completed += 1 + tasks_completed += 1 else: print( f"Unknown exit event: {exit_event.getCause()}. Continuing..." ) - if kernels_launched == args.debug_at_gpu_kernel: + if tasks_completed == args.debug_at_gpu_task: + print(f"Enabling debug flags @ GPU task {tasks_completed}") m5.trace.enable() - if kernels_launched == args.exit_at_gpu_kernel: - print(f"Exiting @ GPU kernel {kernels_launched}") + if tasks_completed == args.exit_at_gpu_task: + print(f"Exiting @ GPU task {tasks_completed}") break exit_event = m5.simulate(sim_ticks - m5.curTick()) diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py index 4bca52c77e..55937cd255 100644 --- a/configs/example/gpufs/system/amdgpu.py +++ b/configs/example/gpufs/system/amdgpu.py @@ -33,7 +33,10 @@ from m5.objects import * def createGPU(system, args): shader = Shader( - n_wf=args.wfs_per_simd, timing=True, clk_domain=system.clk_domain + n_wf=args.wfs_per_simd, + cu_per_sqc=args.cu_per_sqc, + timing=True, + clk_domain=system.clk_domain, ) # VIPER GPU protocol implements release consistency at GPU side. So, @@ -84,6 +87,7 @@ def createGPU(system, args): vrfs = [] vrf_pool_mgrs = [] srfs = [] + rfcs = [] srf_pool_mgrs = [] for j in range(args.simds_per_cu): for k in range(shader.n_wf): @@ -133,10 +137,16 @@ def createGPU(system, args): num_regs=args.sreg_file_size, ) ) + rfcs.append( + RegisterFileCache( + simd_id=j, cache_size=args.register_file_cache_size + ) + ) compute_units[-1].wavefronts = wavefronts compute_units[-1].vector_register_file = vrfs compute_units[-1].scalar_register_file = srfs + compute_units[-1].register_file_cache = rfcs compute_units[-1].register_manager = RegisterManager( policy=args.registerManagerPolicy, vrf_pool_managers=vrf_pool_mgrs, @@ -181,10 +191,14 @@ def connectGPU(system, args): system.pc.south_bridge.gpu.DeviceID = 0x740F system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002 system.pc.south_bridge.gpu.SubsystemID = 0x0C34 + elif args.gpu_device == "MI300X": + system.pc.south_bridge.gpu.DeviceID = 0x740F + system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002 + system.pc.south_bridge.gpu.SubsystemID = 0x0C34 elif args.gpu_device == "Vega10": system.pc.south_bridge.gpu.DeviceID = 0x6863 else: - panic(f"Unknown GPU device: {args.gpu_device}") + m5.util.panic(f"Unknown GPU device: {args.gpu_device}") # Use the gem5 default of 0x280 OR'd with 0x10 which tells Linux there is # a PCI capabilities list to travse. diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 2803e10fb4..1322650964 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -108,18 +108,26 @@ def makeGpuFSSystem(args): system.cpu.append(shader) # This arbitrary address is something in the X86 I/O hole - hsapp_gpu_map_paddr = 0xE00000000 + hsapp_gpu_map_paddr = 0xE0000000 hsapp_pt_walker = VegaPagetableWalker() gpu_hsapp = HSAPacketProcessor( pioAddr=hsapp_gpu_map_paddr, numHWQueues=args.num_hw_queues, walker=hsapp_pt_walker, ) - dispatcher_exit_events = True if args.exit_at_gpu_kernel > -1 else False + dispatcher_exit_events = False + if args.exit_at_gpu_task > -1: + dispatcher_exit_events = True + if args.exit_after_gpu_kernel > -1: + dispatcher_exit_events = True dispatcher = GPUDispatcher(kernel_exit_events=dispatcher_exit_events) cp_pt_walker = VegaPagetableWalker() + target_kernel = args.skip_until_gpu_kernel gpu_cmd_proc = GPUCommandProcessor( - hsapp=gpu_hsapp, dispatcher=dispatcher, walker=cp_pt_walker + hsapp=gpu_hsapp, + dispatcher=dispatcher, + walker=cp_pt_walker, + target_non_blit_kernel_id=target_kernel, ) shader.dispatcher = dispatcher shader.gpu_cmd_proc = gpu_cmd_proc @@ -153,7 +161,7 @@ def makeGpuFSSystem(args): 0x7D000, ] sdma_sizes = [0x1000] * 8 - elif args.gpu_device == "MI200": + elif args.gpu_device == "MI200" or args.gpu_device == "MI300X": num_sdmas = 5 sdma_bases = [ 0x4980, @@ -180,9 +188,15 @@ def makeGpuFSSystem(args): system.pc.south_bridge.gpu.sdmas = sdma_engines - # Setup PM4 packet processor - pm4_pkt_proc = PM4PacketProcessor() - system.pc.south_bridge.gpu.pm4_pkt_proc = pm4_pkt_proc + # Setup PM4 packet processors + pm4_procs = [] + pm4_procs.append( + PM4PacketProcessor( + ip_id=0, mmio_range=AddrRange(start=0xC000, end=0xD000) + ) + ) + + system.pc.south_bridge.gpu.pm4_pkt_procs = pm4_procs # GPU data path gpu_mem_mgr = AMDGPUMemoryManager() @@ -199,7 +213,8 @@ def makeGpuFSSystem(args): for sdma in sdma_engines: system._dma_ports.append(sdma) system._dma_ports.append(device_ih) - system._dma_ports.append(pm4_pkt_proc) + for pm4_proc in pm4_procs: + system._dma_ports.append(pm4_proc) system._dma_ports.append(system_hub) system._dma_ports.append(gpu_mem_mgr) system._dma_ports.append(hsapp_pt_walker) @@ -213,7 +228,8 @@ def makeGpuFSSystem(args): for sdma in sdma_engines: sdma.pio = system.iobus.mem_side_ports device_ih.pio = system.iobus.mem_side_ports - pm4_pkt_proc.pio = system.iobus.mem_side_ports + for pm4_proc in pm4_procs: + pm4_proc.pio = system.iobus.mem_side_ports system_hub.pio = system.iobus.mem_side_ports # Full system needs special TLBs for SQC, Scalar, and vector data ports @@ -247,7 +263,7 @@ def makeGpuFSSystem(args): 0x00000340, 0x00000000, 0x00000340, - 0x0000000F, + 0x00000000, 0x00000340, 0x00000000, 0x00000000, @@ -265,7 +281,7 @@ def makeGpuFSSystem(args): # See: https://sandpile.org/x86/cpuid.htm#level_0000_0001h # Enables AVX, OSXSAVE, XSAVE, POPCNT, SSE4.2, SSE4.1, CMPXCHG16B, # and FMA. - avx_cpu_features = [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x1C983209] + avx_cpu_features = [0x00020F51, 0x00000805, 0xEFDBFBFF, 0x1C803209] for i, cpu in enumerate(system.cpu): # Break once we reach the shader "CPU" diff --git a/configs/example/gpufs/vega10.py b/configs/example/gpufs/vega10.py index ae74efd39b..9c3116d415 100644 --- a/configs/example/gpufs/vega10.py +++ b/configs/example/gpufs/vega10.py @@ -52,7 +52,7 @@ if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5." /sbin/m5 exit fi -modprobe -v amdgpu ip_block_mask=0xff ppfeaturemask=0 dpm=0 audio=0 +modprobe -v amdgpu ip_block_mask=0xdf ppfeaturemask=0 dpm=0 audio=0 echo "Running {} {}" echo "{}" | base64 -d > myapp chmod +x myapp diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py index 2dcbdeca01..4540293482 100644 --- a/configs/example/hsaTopology.py +++ b/configs/example/hsaTopology.py @@ -243,7 +243,7 @@ def createVegaTopology(options): file_append((node_dir, "properties"), node_prop) - # Fiji HBM reporting + # Vega HBM reporting # TODO: Extract size, clk, and width from sim paramters mem_dir = joinpath(node_dir, "mem_banks/0") remake_dir(mem_dir) @@ -260,196 +260,7 @@ def createVegaTopology(options): file_append((mem_dir, "properties"), mem_prop) -# This fakes out a dGPU setup so the runtime correctly operations. The spoofed -# system has a single dGPU and a single socket CPU. Note that more complex -# topologies (multi-GPU, multi-socket CPUs) need to have a different setup -# here or the runtime won't be able to issue Memcpies from one node to another. -# -# TODO: There is way too much hardcoded here. It doesn't effect anything in -# our current ROCm stack (1.6), but it is highly possible that it will in the -# future. We might need to scrub through this and extract the appropriate -# fields from the simulator in the future. -def createFijiTopology(options): - topology_dir = joinpath( - m5.options.outdir, "fs/sys/devices/virtual/kfd/kfd/topology" - ) - remake_dir(topology_dir) - - amdgpu_dir = joinpath(m5.options.outdir, "fs/sys/module/amdgpu/parameters") - remake_dir(amdgpu_dir) - - # Fiji reported VM size in GB. Used to reserve an allocation from CPU - # to implement SVM (i.e. GPUVM64 pointers and X86 pointers agree) - file_append((amdgpu_dir, "vm_size"), 256) - - # Ripped from real Fiji platform to appease KMT version checks - file_append((topology_dir, "generation_id"), 2) - - # Set up system properties. Regiter as ast-rocm server - sys_prop = ( - "platform_oem 35498446626881\n" - + "platform_id 71791775140929\n" - + "platform_rev 2\n" - ) - file_append((topology_dir, "system_properties"), sys_prop) - - # Populate the topology tree - # Our dGPU system is two nodes. Node 0 is a CPU and Node 1 is a dGPU - node_dir = joinpath(topology_dir, "nodes/0") - remake_dir(node_dir) - - # Register as a CPU - file_append((node_dir, "gpu_id"), 0) - file_append((node_dir, "name"), "") - - # CPU links. Only thing that matters is we tell the runtime that GPU is - # connected through PCIe to CPU socket 0. - io_links = 1 - io_dir = joinpath(node_dir, "io_links/0") - remake_dir(io_dir) - io_prop = ( - "type 2\n" - + "version_major 0\n" - + "version_minor 0\n" - + "node_from 0\n" - + "node_to 1\n" - + "weight 20\n" - + "min_latency 0\n" - + "max_latency 0\n" - + "min_bandwidth 0\n" - + "max_bandwidth 0\n" - + "recommended_transfer_size 0\n" - + "flags 13\n" - ) - file_append((io_dir, "properties"), io_prop) - - # Populate CPU node properties - node_prop = ( - f"cpu_cores_count {options.num_cpus}\n" - + "simd_count 0\n" - + "mem_banks_count 1\n" - + "caches_count 0\n" - + f"io_links_count {io_links}\n" - + "cpu_core_id_base 0\n" - + "simd_id_base 0\n" - + "max_waves_per_simd 0\n" - + "lds_size_in_kb 0\n" - + "gds_size_in_kb 0\n" - + "wave_front_size 64\n" - + "array_count 0\n" - + "simd_arrays_per_engine 0\n" - + "cu_per_simd_array 0\n" - + "simd_per_cu 0\n" - + "max_slots_scratch_cu 0\n" - + "vendor_id 0\n" - + "device_id 0\n" - + "location_id 0\n" - + "drm_render_minor 0\n" - + "max_engine_clk_ccompute 3400\n" - ) - - file_append((node_dir, "properties"), node_prop) - - # CPU memory reporting - mem_dir = joinpath(node_dir, "mem_banks/0") - remake_dir(mem_dir) - # Heap type value taken from real system, heap type values: - # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317 - mem_prop = ( - "heap_type 0\n" - + "size_in_bytes 33704329216\n" - + "flags 0\n" - + "width 72\n" - + "mem_clk_max 2400\n" - ) - - file_append((mem_dir, "properties"), mem_prop) - - # Build the GPU node - node_dir = joinpath(topology_dir, "nodes/1") - remake_dir(node_dir) - - # Register as a Fiji - file_append((node_dir, "gpu_id"), 50156) - file_append((node_dir, "name"), "Fiji\n") - - # Should be the same as the render driver filename (dri/renderD) - drm_num = 128 - - # Real Fiji shows 96, but building that topology is complex and doesn't - # appear to be required for anything. - caches = 0 - - # GPU links. Only thing that matters is we tell the runtime that GPU is - # connected through PCIe to CPU socket 0. - io_links = 1 - io_dir = joinpath(node_dir, "io_links/0") - remake_dir(io_dir) - io_prop = ( - "type 2\n" - + "version_major 0\n" - + "version_minor 0\n" - + "node_from 1\n" - + "node_to 0\n" - + "weight 20\n" - + "min_latency 0\n" - + "max_latency 0\n" - + "min_bandwidth 0\n" - + "max_bandwidth 0\n" - + "recommended_transfer_size 0\n" - + "flags 1\n" - ) - file_append((io_dir, "properties"), io_prop) - - # Populate GPU node properties - node_prop = ( - "cpu_cores_count 0\n" - + f"simd_count {options.num_compute_units * options.simds_per_cu}\n" - + "mem_banks_count 1\n" - + f"caches_count {caches}\n" - + f"io_links_count {io_links}\n" - + "cpu_core_id_base 0\n" - + "simd_id_base 2147487744\n" - + f"max_waves_per_simd {options.wfs_per_simd}\n" - + f"lds_size_in_kb {int(options.lds_size / 1024)}\n" - + "gds_size_in_kb 0\n" - + f"wave_front_size {options.wf_size}\n" - + "array_count 4\n" - + f"simd_arrays_per_engine {options.sa_per_complex}\n" - + f"cu_per_simd_array {options.cu_per_sa}\n" - + f"simd_per_cu {options.simds_per_cu}\n" - + "max_slots_scratch_cu 32\n" - + "vendor_id 4098\n" - + "device_id 29440\n" - + "location_id 512\n" - + f"drm_render_minor {drm_num}\n" - + f"max_engine_clk_fcompute {int(toFrequency(options.gpu_clock) / 1000000.0)}\n" - + "local_mem_size 4294967296\n" - + "fw_version 730\n" - + "capability 4736\n" - + f"max_engine_clk_ccompute {int(toFrequency(options.CPUClock) / 1000000.0)}\n" - ) - - file_append((node_dir, "properties"), node_prop) - - # Fiji HBM reporting - # TODO: Extract size, clk, and width from sim paramters - mem_dir = joinpath(node_dir, "mem_banks/0") - remake_dir(mem_dir) - # Heap type value taken from real system, heap type values: - # https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-4.0.x/include/hsakmttypes.h#L317 - mem_prop = ( - "heap_type 1\n" - + "size_in_bytes 4294967296\n" - + "flags 0\n" - + "width 4096\n" - + "mem_clk_max 500\n" - ) - - file_append((mem_dir, "properties"), mem_prop) - - -def createCarrizoTopology(options): +def createRavenTopology(options): topology_dir = joinpath( m5.options.outdir, "fs/sys/devices/virtual/kfd/kfd/topology" ) @@ -476,7 +287,6 @@ def createCarrizoTopology(options): file_append((node_dir, "gpu_id"), 2765) gfx_dict = { - "gfx801": {"name": "Carrizo\n", "id": 39028}, "gfx902": {"name": "Raven\n", "id": 5597}, } diff --git a/configs/example/lupv/run_lupv.py b/configs/example/lupv/run_lupv.py index 4be6b924a5..6dc5d3526f 100644 --- a/configs/example/lupv/run_lupv.py +++ b/configs/example/lupv/run_lupv.py @@ -49,8 +49,8 @@ from gem5.utils.requires import requires # Run a check to ensure the right version of gem5 is being used. requires(isa_required=ISA.RISCV) -from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( - PrivateL1PrivateL2CacheHierarchy, +from gem5.components.cachehierarchies.classic.private_l1_private_l2_walk_cache_hierarchy import ( + PrivateL1PrivateL2WalkCacheHierarchy, ) parser = argparse.ArgumentParser(description="Runs Linux fs test with RISCV.") @@ -72,7 +72,7 @@ parser.add_argument( args = parser.parse_args() -cache_hierarchy = PrivateL1PrivateL2CacheHierarchy( +cache_hierarchy = PrivateL1PrivateL2WalkCacheHierarchy( l1d_size="32KiB", l1i_size="32KiB", l2_size="512KiB" ) @@ -98,8 +98,12 @@ board = LupvBoard( # Set the Full System workload. board.set_kernel_disk_workload( - kernel=obtain_resource("riscv-lupio-linux-kernel"), - disk_image=obtain_resource("riscv-lupio-busybox-img"), + kernel=obtain_resource( + "riscv-lupio-linux-kernel", resource_version="1.0.0" + ), + disk_image=obtain_resource( + "riscv-lupio-busybox-img", resource_version="1.0.0" + ), ) diff --git a/configs/example/riscv/fs_linux.py b/configs/example/riscv/fs_linux.py index c0643c883d..246a1b24d9 100644 --- a/configs/example/riscv/fs_linux.py +++ b/configs/example/riscv/fs_linux.py @@ -145,7 +145,17 @@ Options.addFSOptions(parser) parser.add_argument( "--virtio-rng", action="store_true", help="Enable VirtIORng device" ) - +parser.add_argument( + "--semihosting", + action="store_true", + help="Enable the RISC-V semihosting interface", +) +parser.add_argument( + "--semihosting-root", + default="/some/invalid/root/directory", + type=str, + help="The root directory for files exposed to semihosting", +) # ---------------------------- Parse Options --------------------------- # args = parser.parse_args() @@ -168,11 +178,17 @@ mdesc = SysConfig( system.mem_mode = mem_mode system.mem_ranges = [AddrRange(start=0x80000000, size=mdesc.mem())] +workload_args = dict() +if args.semihosting: + workload_args["semihosting"] = RiscvSemihosting( + files_root_dir=args.semihosting_root, + cmd_line=args.kernel, + ) if args.bare_metal: - system.workload = RiscvBareMetal() + system.workload = RiscvBareMetal(**workload_args) system.workload.bootloader = args.kernel else: - system.workload = RiscvLinux() + system.workload = RiscvLinux(**workload_args) system.workload.object_file = args.kernel system.iobus = IOXBar() diff --git a/configs/nvm/sweep.py b/configs/nvm/sweep.py index d5d23ad76a..ab77768e08 100644 --- a/configs/nvm/sweep.py +++ b/configs/nvm/sweep.py @@ -59,7 +59,7 @@ nvm_generators = {"NVM": lambda x: x.createNvm} # Use a single-channel DDR3-1600 x64 (8x8 topology) by default parser.add_argument( - "--nvm-type", + "--mem-type", default="NVM_2400_1x64", choices=ObjectList.mem_list.get_names(), help="type of memory to use", @@ -212,7 +212,7 @@ def trace(): nbr_banks, bank, addr_map, - args.dram_ranks, + args.nvm_ranks, ) yield system.tgen.createExit(0) diff --git a/configs/nvm/sweep_hybrid.py b/configs/nvm/sweep_hybrid.py index 669f847eb1..82a4a6124e 100644 --- a/configs/nvm/sweep_hybrid.py +++ b/configs/nvm/sweep_hybrid.py @@ -143,7 +143,7 @@ MemConfig.config_mem(args, system) # the following assumes that we are using the native controller # with NVM and DRAM interfaces, check to be sure -if not isinstance(system.mem_ctrls[0], m5.objects.HeteroMemCtrl): +if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl): fatal("This script assumes the controller is a HeteroMemCtrl subclass") if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface): fatal("This script assumes the first memory is a DRAMInterface subclass") diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py index 0818b7f0eb..c10ccac647 100644 --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -149,7 +149,8 @@ class TCPCache(RubyCache): self.size = MemorySize(options.tcp_size) self.assoc = options.tcp_assoc self.resourceStalls = options.no_tcc_resource_stalls - self.replacement_policy = TreePLRURP() + if hasattr(options, "tcp_rp"): + self.replacement_policy = RP_choose(options.tcp_rp) class TCPCntrl(TCP_Controller, CntrlBase): @@ -241,7 +242,8 @@ class SQCCache(RubyCache): def create(self, options): self.size = MemorySize(options.sqc_size) self.assoc = options.sqc_assoc - self.replacement_policy = TreePLRURP() + if hasattr(options, "sqc_rp"): + self.replacement_policy = RP_choose(options.sqc_rp) class SQCCntrl(SQC_Controller, CntrlBase): @@ -303,7 +305,8 @@ class TCC(RubyCache): self.start_index_bit = math.log(options.cacheline_size, 2) + math.log( options.num_tccs, 2 ) - self.replacement_policy = TreePLRURP() + if hasattr(options, "tcc_rp"): + self.replacement_policy = RP_choose(options.tcc_rp) class TCCCntrl(TCC_Controller, CntrlBase): @@ -497,13 +500,6 @@ def define_options(parser): parser.add_argument( "--noL1", action="store_true", default=False, help="bypassL1" ) - parser.add_argument( - "--scalar-buffer-size", - type=int, - default=128, - help="Size of the mandatory queue in the GPU scalar " - "cache controller", - ) parser.add_argument( "--glc-atomic-latency", type=int, default=1, help="GLC Atomic Latency" ) @@ -841,9 +837,7 @@ def construct_scalars(options, system, ruby_system, network): scalar_cntrl.responseToSQC = MessageBuffer(ordered=True) scalar_cntrl.responseToSQC.in_port = network.out_port - scalar_cntrl.mandatoryQueue = MessageBuffer( - buffer_size=options.scalar_buffer_size - ) + scalar_cntrl.mandatoryQueue = MessageBuffer() return (scalar_sequencers, scalar_cntrl_nodes) @@ -1133,3 +1127,28 @@ def create_system( ruby_system.network.number_of_virtual_networks = 11 return (cpu_sequencers, dir_cntrl_nodes, mainCluster) + + +def RP_choose(test_name): + if test_name == "TreePLRURP": + replacement_policy = TreePLRURP() + elif test_name == "LRURP": + replacement_policy = LRURP() + elif test_name == "FIFORP": + replacement_policy = FIFORP() + elif test_name == "LFURP": + replacement_policy = LFURP() + elif test_name == "LIPRP": + replacement_policy = LIPRP() + elif test_name == "MRURP": + replacement_policy = MRURP() + elif test_name == "NRURP": + replacement_policy = NRURP() + elif test_name == "RRIPRP": + replacement_policy = RRIPRP() + elif test_name == "SecondChanceRP": + replacement_policy = SecondChanceRP() + elif test_name == "SHiPMemRP": + replacement_policy = SHiPMemRP() + + return replacement_policy diff --git a/ext/softfloat/softfloat_types.h b/ext/softfloat/softfloat_types.h index af1888f9b9..5123cd39c6 100644 --- a/ext/softfloat/softfloat_types.h +++ b/ext/softfloat/softfloat_types.h @@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | the types below may, if desired, be defined as aliases for the native types | (typically 'float' and 'double', and possibly 'long double'). *----------------------------------------------------------------------------*/ +typedef struct { uint8_t v; } float8_t; typedef struct { uint16_t v; } float16_t; typedef struct { uint32_t v; } float32_t; typedef struct { uint64_t v; } float64_t; diff --git a/ext/systemc/SConscript b/ext/systemc/SConscript index 5248fc32d9..d6c3f3f135 100644 --- a/ext/systemc/SConscript +++ b/ext/systemc/SConscript @@ -25,13 +25,16 @@ import os from m5.util.terminal import get_termcap -import gem5_scons +import sys Import('env') systemc = env.Clone() build_root = Dir('.').abspath src_root = Dir('.').srcdir.abspath +gem5_root = Dir('#../..').srcnode().abspath +sys.path.append(os.path.join(gem5_root, 'site_scons')) +import gem5_scons systemc.Prepend(CPPPATH=Dir('./src').srcnode()) systemc.Prepend(CPATH=Dir('./src')) diff --git a/ext/testlib/configuration.py b/ext/testlib/configuration.py index 60c0c17654..cebf493add 100644 --- a/ext/testlib/configuration.py +++ b/ext/testlib/configuration.py @@ -245,7 +245,6 @@ def define_constants(constants): constants.isa_tag_type = "isa" constants.x86_tag = "X86" - constants.gcn3_x86_tag = "GCN3_X86" constants.vega_x86_tag = "VEGA_X86" constants.sparc_tag = "SPARC" constants.riscv_tag = "RISCV" @@ -274,7 +273,6 @@ def define_constants(constants): constants.supported_tags = { constants.isa_tag_type: ( constants.x86_tag, - constants.gcn3_x86_tag, constants.vega_x86_tag, constants.sparc_tag, constants.riscv_tag, @@ -305,7 +303,6 @@ def define_constants(constants): constants.target_host = { constants.arm_tag: (constants.host_arm_tag,), constants.x86_tag: (constants.host_x86_64_tag,), - constants.gcn3_x86_tag: (constants.host_x86_64_tag,), constants.vega_x86_tag: (constants.host_x86_64_tag,), constants.sparc_tag: (constants.host_x86_64_tag,), constants.riscv_tag: (constants.host_x86_64_tag,), diff --git a/optional-requirements.txt b/optional-requirements.txt index f88787df1f..d69c960fa1 100644 --- a/optional-requirements.txt +++ b/optional-requirements.txt @@ -1 +1 @@ -tqdm==4.64.1 +tqdm==4.66.4 diff --git a/requirements.txt b/requirements.txt index 4b820f51ba..9a1748f82a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -mypy==1.5.1 -pre-commit==2.20.0 +mypy==1.10.0 +pre-commit==3.7.1 diff --git a/site_scons/gem5_scons/configure.py b/site_scons/gem5_scons/configure.py index c1b9fb56cc..c1d3f8f0e0 100644 --- a/site_scons/gem5_scons/configure.py +++ b/site_scons/gem5_scons/configure.py @@ -59,13 +59,15 @@ def CheckCxxFlag(context, flag, autoadd=True): return ret -def CheckLinkFlag(context, flag, autoadd=True, set_for_shared=True): +def CheckLinkFlag(context, flag, autoadd=True, set_for_shared=True, code=None): context.Message(f"Checking for linker {flag} support... ") last_linkflags = context.env["LINKFLAGS"] context.env.Append(LINKFLAGS=[flag]) pre_werror = context.env["LINKFLAGS"] context.env.Append(LINKFLAGS=["-Werror"]) - ret = context.TryLink("int main(int, char *[]) { return 0; }", ".cc") + if not code: + code = "int main(int, char *[]) { return 0; }" + ret = context.TryLink(code, ".cc") context.env["LINKFLAGS"] = pre_werror if not (ret and autoadd): context.env["LINKFLAGS"] = last_linkflags diff --git a/src/Doxyfile b/src/Doxyfile index 68d9b3b44b..2206f17669 100644 --- a/src/Doxyfile +++ b/src/Doxyfile @@ -31,7 +31,7 @@ PROJECT_NAME = gem5 # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = v23.1.0.0 +PROJECT_NUMBER = v24.0.0.0 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. diff --git a/src/Kconfig b/src/Kconfig index 2d24aad1ad..268f7ca1c5 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -51,3 +51,4 @@ rsource "arch/Kconfig" rsource "cpu/Kconfig" rsource "systemc/Kconfig" rsource "gpu-compute/Kconfig" +rsource "test_objects/Kconfig" diff --git a/src/arch/SConscript b/src/arch/SConscript index 2426401d73..0607c7a47c 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -68,7 +68,7 @@ if env['CONF']['BUILD_ISA']: error("At least one ISA need to be set") -amdgpu_isa = ['gcn3', 'vega'] +amdgpu_isa = ['vega'] if env['CONF']['BUILD_GPU']: env.SwitchingHeaders( diff --git a/src/arch/amdgpu/Kconfig b/src/arch/amdgpu/Kconfig index 5140f2b103..38c3533eb8 100644 --- a/src/arch/amdgpu/Kconfig +++ b/src/arch/amdgpu/Kconfig @@ -29,5 +29,4 @@ prompt "GPU ISA" endchoice endif -rsource "gcn3/Kconfig" rsource "vega/Kconfig" diff --git a/src/arch/amdgpu/common/SConscript b/src/arch/amdgpu/common/SConscript index ffa5fcb5da..82f9f01d77 100644 --- a/src/arch/amdgpu/common/SConscript +++ b/src/arch/amdgpu/common/SConscript @@ -34,7 +34,7 @@ Import('*') if not env['CONF']['BUILD_GPU']: Return() -if env['CONF']['TARGET_GPU_ISA'] in ('gcn3', 'vega'): +if env['CONF']['TARGET_GPU_ISA'] in ('vega'): SimObject('X86GPUTLB.py', sim_objects=['X86GPUTLB', 'TLBCoalescer']) Source('tlb.cc') diff --git a/src/arch/amdgpu/common/dtype/README.md b/src/arch/amdgpu/common/dtype/README.md new file mode 100644 index 0000000000..02f1964fdb --- /dev/null +++ b/src/arch/amdgpu/common/dtype/README.md @@ -0,0 +1,21 @@ +# Microscaling Formats + +This directory defines [microscaling formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) which are reduced precision floating point formats. +The class makes some assumptions to simplify things and is not completely generic. +For example: +- Types must be smaller than 32-bits. +- Type conversions currently assume that either: + - The destination format exponent and mantissa bits are both greater or equal to the source format. + - OR the destination format exponent and mantissa are both less than or equal to the source format. + - In other words, one type cannot have larger exponent and smaller mantissa and visa versa. +- Basic MX operations are implementation defined, meaning MX types can be converted to FP32 for arithmetic + - This means that arithmetic operators need not be defined for MX types. +- Exponent and mantissa of zero is zero. There is no special case for the sign (i.e, -0 is not special). +- The spec does not differentiate between signaling and quiet NaN, therefore quiet NaN is used. +- New types must template specialize the following standard library methods: + - isinf(T) + - isnan(T) + - isnormal(T) +- New types must template specialize the following std::numeric_limits members / methods: + - has_infinity / infinity() + - has_quiet_NaN / quiet_NaN() diff --git a/src/arch/amdgpu/gcn3/SConsopts b/src/arch/amdgpu/common/dtype/SConscript similarity index 92% rename from src/arch/amdgpu/gcn3/SConsopts rename to src/arch/amdgpu/common/dtype/SConscript index edccf603fa..67a3849651 100644 --- a/src/arch/amdgpu/gcn3/SConsopts +++ b/src/arch/amdgpu/common/dtype/SConscript @@ -1,6 +1,4 @@ -# -*- mode:python -*- - -# Copyright (c) 2015, 2017 Advanced Micro Devices, Inc. +# Copyright (c) 2024 Advanced Micro Devices, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,4 +29,4 @@ Import('*') -main.Append(ALL_GPU_ISAS=['gcn3']) +GTest('mxfp.test', 'mxfp.test.cc') diff --git a/src/arch/amdgpu/common/dtype/binary32.hh b/src/arch/amdgpu/common/dtype/binary32.hh new file mode 100644 index 0000000000..441eed57ca --- /dev/null +++ b/src/arch/amdgpu/common/dtype/binary32.hh @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_BINARY32_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_BINARY32_HH__ + +namespace gem5 +{ + +namespace AMDGPU +{ + +// Same as IEEE 754 binary 32 - Microscaling types are converted to/from +// this format by default. For now as there do not seem to be any MI300 +// instructions operating directly on the types (i.e., they all cast to FP32 +// first and then perform arithmetic operations). +typedef union binary32_u +{ + enum bitSizes + { + ebits = 8, + mbits = 23, + sbits = 1, + bias = 127, + + inf = 0x7f800000, + nan = 0x7f800100, + max = 0x7f7fffff + }; + + uint32_t storage; + float fp32; + struct + { + unsigned mant : 23; + unsigned exp : 8; + unsigned sign : 1; + }; + + // To help with stdlib functions with T = float. + operator float() const + { + return fp32; + } +} binary32; +static_assert(sizeof(binary32) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + +namespace std +{ + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::binary32 quiet_NaN() + { + gem5::AMDGPU::binary32 tmp; + tmp.fp32 = std::numeric_limits::quiet_NaN(); + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::binary32 infinity() + { + gem5::AMDGPU::binary32 tmp; + tmp.fp32 = std::numeric_limits::infinity(); + return tmp; + } + + static gem5::AMDGPU::binary32 max() + { + gem5::AMDGPU::binary32 tmp; + tmp.fp32 = std::numeric_limits::max(); + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_BINARY32_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp16_e5m10.hh b/src/arch/amdgpu/common/dtype/fp16_e5m10.hh new file mode 100644 index 0000000000..363dcada12 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp16_e5m10.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP16_E5M10_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP16_E5M10_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 5, + mbits = 10, + sbits = 1, + zbits = 16, + bias = 15, + + inf = 0x7c000000, + nan = 0x7c100000, + max = 0x7bff0000 + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp16_e5m10_info; +static_assert(sizeof(fp16_e5m10_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a) +{ + return a.exp == 0x1F && a.mant == 0; +} + +constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a) +{ + return a.exp == 0x1F && a.mant != 0; +} + +constexpr bool isnormal(gem5::AMDGPU::fp16_e5m10_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp16_e5m10_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp16_e5m10_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e5m10_info::nan; + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::fp16_e5m10_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp16_e5m10_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e5m10_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp16_e5m10_info max() + { + gem5::AMDGPU::fp16_e5m10_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e5m10_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP16_E5M10_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp16_e8m7.hh b/src/arch/amdgpu/common/dtype/fp16_e8m7.hh new file mode 100644 index 0000000000..3c796fca51 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp16_e8m7.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP16_E8M7_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP16_E8M7_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 8, + mbits = 7, + sbits = 1, + zbits = 16, + bias = 127, + + inf = 0x7f800000, + nan = 0x7f810000, + max = 0x7f7f0000 + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp16_e8m7_info; +static_assert(sizeof(fp16_e8m7_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +constexpr bool isinf(gem5::AMDGPU::fp16_e8m7_info a) +{ + return a.exp == 0xFF && a.mant == 0; +} + +constexpr bool isnan(gem5::AMDGPU::fp16_e8m7_info a) +{ + return a.exp == 0xFF && a.mant != 0; +} + +constexpr bool isnormal(gem5::AMDGPU::fp16_e8m7_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp16_e8m7_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp16_e8m7_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e8m7_info::nan; + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::fp16_e8m7_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp16_e8m7_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e8m7_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp16_e8m7_info max() + { + gem5::AMDGPU::fp16_e8m7_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e8m7_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP16_E8M7_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp8_e4m3.hh b/src/arch/amdgpu/common/dtype/fp8_e4m3.hh new file mode 100644 index 0000000000..46d2685c00 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp8_e4m3.hh @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP8_E4M3_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP8_E4M3_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 4, + mbits = 3, + sbits = 1, + zbits = 24, + bias = 7, + + inf = (0x7f << zbits), + nan = (0xff << zbits), + max = (0x7f << zbits) + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp8_e4m3_info; +static_assert(sizeof(fp8_e4m3_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +// Inf not defined +constexpr bool isinf(gem5::AMDGPU::fp8_e4m3_info a) { return false; } + +constexpr bool isnan(gem5::AMDGPU::fp8_e4m3_info a) +{ + return a.exp == 0xF && a.mant == 0x7; +} + +constexpr bool isnormal(gem5::AMDGPU::fp8_e4m3_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp8_e4m3_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp8_e4m3_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e4m3_info::nan; + return tmp; + } + + static constexpr bool has_infinity = false; + static gem5::AMDGPU::fp8_e4m3_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp8_e4m3_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e4m3_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp8_e4m3_info max() + { + gem5::AMDGPU::fp8_e4m3_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e4m3_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP8_E4M3_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp8_e5m2.hh b/src/arch/amdgpu/common/dtype/fp8_e5m2.hh new file mode 100644 index 0000000000..9e1f5812d5 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp8_e5m2.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP8_E5M2_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP8_E5M2_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 5, + mbits = 2, + sbits = 1, + zbits = 24, + bias = 15, + + inf = (0x7c << zbits), + nan = (0xff << zbits), + max = (0x7f << zbits) + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp8_e5m2_info; +static_assert(sizeof(fp8_e5m2_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +constexpr bool isinf(gem5::AMDGPU::fp8_e5m2_info a) +{ + return a.exp == 0x1F && a.mant == 0x0; +} + +constexpr bool isnan(gem5::AMDGPU::fp8_e5m2_info a) +{ + return a.exp == 0x1F && a.mant != 0x0; +} + +constexpr bool isnormal(gem5::AMDGPU::fp8_e5m2_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp8_e5m2_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp8_e5m2_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e5m2_info::nan; + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::fp8_e5m2_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp8_e5m2_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e5m2_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp8_e5m2_info max() + { + gem5::AMDGPU::fp8_e5m2_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e5m2_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP8_E5M2_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp.hh b/src/arch/amdgpu/common/dtype/mxfp.hh new file mode 100644 index 0000000000..d7edb32dbf --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp.hh @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__ + +#include +#include +#include + +#include "arch/amdgpu/common/dtype/mxfp_convert.hh" + +namespace gem5 +{ + +namespace AMDGPU +{ + +// Base class for all microscaling types. The sizes of everything are +// determined by the enum fields in the FMT struct. All of these share the +// same operator overloads which convert to float before arithmetic and +// convert back if assigned to a microscaling type. +template +class mxfp +{ + public: + mxfp() = default; + mxfp(float f) : mode(roundTiesToEven) + { + data = float_to_mxfp(f); + } + + // Set raw bits, used by gem5 to set a raw value read from VGPRs. + mxfp(const uint32_t& raw) + { + // The info unions end up being "left" aligned. For example, in FP4 + // only the bits 31:28 are used. Shift the input by the storage size + // of 32 by the type size (sign + exponent + mantissa bits). + data = raw; + data <<= (32 - int(FMT::sbits) - int(FMT::ebits) - int(FMT::mbits)); + } + + mxfp(const mxfp& f) + { + FMT conv_out; + conv_out = convertMXFP(f.getFmt()); + data = conv_out.storage; + } + + mxfp& + operator=(const float& f) + { + data = float_to_mxfp(f); + return *this; + } + + mxfp& + operator=(const mxfp& f) + { + FMT conv_out; + conv_out = convertMXFP(f.getFmt()); + data = conv_out.storage; + return *this; + } + + operator float() const + { + binary32 out; + FMT in; + in.storage = data; + out = convertMXFP(in, mode); + + return out.fp32; + } + + constexpr static int + size() + { + return int(FMT::mbits) + int(FMT::ebits) + int(FMT::sbits); + } + + // Intentionally use storage > size() so that a storage type is not needed + // as a template parameter. + uint32_t data = 0; + + FMT + getFmt() const + { + FMT out; + out.storage = data; + return out; + } + + void + setFmt(FMT in) + { + data = in.storage; + } + + void + scale(const float& f) + { + binary32 bfp; + bfp.fp32 = f; + int scale_val = bfp.exp - bfp.bias; + + // Scale value of 0xFF is NaN. Scaling by NaN returns NaN. + // In this implementation, types without NaN define it as zero. + if (scale_val == 0xFF) { + data = FMT::nan; + return; + } + + FMT in = getFmt(); + int exp = in.exp; + + if (exp + scale_val > max_exp()) { + in.exp = max_exp(); + } else if (exp + scale_val < min_exp()) { + in.exp = min_exp(); + } else { + in.exp = exp + scale_val; + } + + data = in.storage; + } + + private: + mxfpRoundingMode mode = roundTiesToEven; + + uint32_t + float_to_mxfp(float f) + { + if (std::isinf(f)) { + assert(std::numeric_limits::has_infinity); + return FMT::inf; + } + + if (std::isnan(f)) { + assert(std::numeric_limits::has_quiet_NaN); + return FMT::nan; + } + + return float_to_mxfp_nocheck(f); + } + + uint32_t + float_to_mxfp_nocheck(float f) + { + binary32 in; + in.fp32 = f; + + FMT out; + out.storage = 0; + + out = convertMXFP(in, mode); + + return out.storage; + } +}; + +// Unary operators +template +inline T operator+(T a) +{ + return a; +} + +template +inline T operator-(T a) +{ + // Flip sign bit + a.data ^= 0x80000000; + return a; +} + +template +inline T operator++(T a) +{ + a = a + T(1.0f); + return a; +} + +template +inline T operator--(T a) +{ + a = a - T(1.0f); + return a; +} + +template +inline T operator++(T a, int) +{ + T original = a; + ++a; + return original; +} + +template +inline T operator--(T a, int) +{ + T original = a; + --a; + return original; +} + +// Math operators +template +inline T operator+(T a, T b) +{ + return T(float(a) + float(b)); +} + +template +inline T operator-(T a, T b) +{ + return T(float(a) - float(b)); +} + +template +inline T operator*(T a, T b) +{ + return T(float(a) * float(b)); +} + +template +inline T operator/(T a, T b) +{ + return T(float(a) / float(b)); +} + +template +inline T operator+=(T &a, T b) +{ + a = a + b; + return a; +} + +template +inline T operator-=(T &a, T b) +{ + a = a - b; + return a; +} + +template +inline T operator*=(T &a, T b) +{ + a = a * b; + return a; +} + +template +inline T operator/=(T &a, T b) +{ + a = a / b; + return a; +} + +// Comparison operators +template +inline bool operator<(T a, T b) +{ + return float(a) < float(b); +} + +template +inline bool operator>(T a, T b) +{ + return float(a) > float(b); +} + +template +inline bool operator<=(T a, T b) +{ + return float(a) <= float(b); +} + +template +inline bool operator>=(T a, T b) +{ + return float(a) >= float(b); +} + +template +inline bool operator==(T a, T b) +{ + return float(a) == float(b); +} + +template +inline bool operator!=(T a, T b) +{ + return float(a) != float(b); +} + +} // namespace AMDGPU + +} // namespace gem5 + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp.test.cc b/src/arch/amdgpu/common/dtype/mxfp.test.cc new file mode 100644 index 0000000000..ca7b2fac60 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp.test.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "arch/amdgpu/common/dtype/mxfp_types.hh" + +template +bool test_raw_mxfp(T raw_mxfp, int bits) +{ + float tmp = float(raw_mxfp); + T from_float(tmp); + + // Simply check that casting to float and back yields the same bit values. + // Exclude inf/NaN as those have multiple values in some MXFP types. + if (raw_mxfp.data != from_float.data && + !std::isnan(tmp) && !std::isinf(tmp)) { + return false; + } + + return true; +} + +template +int test_type(int bits) +{ + T raw_mxfp; + int errors = 0; + + int max_val = 1 << bits; + for (int val = 0; val < max_val; ++val) { + // Raw data is aligned to MSb in MXFP types. Shift into place. + raw_mxfp.data = val << (32 - bits); + if (!test_raw_mxfp(raw_mxfp, bits)) { + errors++; + } + } + + return errors; +} + +TEST(MxfpTest, MxBf16Test) +{ + using T = gem5::AMDGPU::mxbfloat16; + + int errors = test_type(T::size()); + + EXPECT_EQ(errors, 0); +} + +TEST(MxfpTest, MxFp16Test) +{ + using T = gem5::AMDGPU::mxfloat16; + + int errors = test_type(T::size()); + + EXPECT_EQ(errors, 0); +} + +TEST(MxfpTest, MxBf8Test) +{ + using T = gem5::AMDGPU::mxbfloat8; + + int errors = test_type(T::size()); + + EXPECT_EQ(errors, 0); +} + +TEST(MxfpTest, MxFp8Test) +{ + using T = gem5::AMDGPU::mxfloat8; + + int errors = test_type(T::size()); + + EXPECT_EQ(errors, 0); +} diff --git a/src/arch/amdgpu/common/dtype/mxfp_convert.hh b/src/arch/amdgpu/common/dtype/mxfp_convert.hh new file mode 100644 index 0000000000..641d5f5732 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp_convert.hh @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_CONVERT_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_CONVERT_HH__ + +#include + +#include "arch/amdgpu/common/dtype/mxfp_type_info.hh" +#include "base/bitfield.hh" + +namespace gem5 +{ + +namespace AMDGPU +{ + +// The various rounding modes for microscaling formats. roundTiesToEven must +// be supported. Other rounding modes may be supported. +enum mxfpRoundingMode +{ + roundTiesToEven, + roundStochastic +}; + +// Conversion functions - For instructions that convert from one microscaling +// format to another. We only need the conversion functions as there do not +// appear to be any instructions yet which operate directly on the MX formats. +// +// in - An MXFP info struct type +// mode - rounding mode +// seed - input value for stochastic rounding function +template +dFMT convertMXFP(sFMT in, mxfpRoundingMode mode = roundTiesToEven, + uint32_t seed = 0) +{ + // We assume that *both* exponent and mantissa bits are both >= or <= + // the target type. Checkable at compile time. + // + // This is not necessarily a limitation, others just are not implemented. + // Figuring this out would be interesting for converting FP8 <-> BF8 for + // example. So far all GPU conversion instructions convert explicitly to + // a larger type from a smaller type or smaller to larger. + static_assert(((int(sFMT::mbits) >= int(dFMT::mbits)) && + (int(sFMT::ebits) >= int(dFMT::ebits))) + || ((int(sFMT::mbits) <= int(dFMT::mbits)) && + (int(sFMT::ebits) <= int(dFMT::ebits)))); + + dFMT out; + out.storage = 0; + + if (int(sFMT::mbits) >= int(dFMT::mbits) && + int(sFMT::ebits) >= int(dFMT::ebits)) { + // Input format is larger, truncate and round mantissa. MX formats + // are subnormal if exp == 0. Zero out exp in that case. + + if (std::isnan(in)) { + // For types with no NaN return max value. + if (std::numeric_limits::has_quiet_NaN) { + out = std::numeric_limits::quiet_NaN(); + } else { + out = std::numeric_limits::max(); + } + } else if (std::isinf(in)) { + // For types with no Inf return max value. + if (std::numeric_limits::has_infinity) { + out = std::numeric_limits::infinity(); + } else { + out = std::numeric_limits::max(); + } + } else if (in.mant == 0 && in.exp == 0) { + // All MX formats FP32, and FP64 encode 0 as all zeros. Keep sign. + out.mant = 0; + out.exp = 0; + out.sign = in.sign; + } else { + // Extra bits are needed for the mantissa conversion. + uint32_t mant = in.mant & mask(sFMT::mbits); + int32_t exp = in.exp - sFMT::bias + dFMT::bias; + out.sign = in.sign; + + // Input is not subnormal, add the implicit 1 bit. + if (in.exp) { + mant |= (1 << sFMT::mbits); + } + + mant >>= (sFMT::mbits - dFMT::mbits); + + // Output became subnormal + if (exp < 1) { + int shift = 1 - exp; + mant >>= shift; + out.exp = 0; + } else { + out.exp = exp; + } + + mant &= mask(dFMT::mbits); + out.mant = mant; + + // roundTiesToEven is the only required rounding mode for MXFP + // types. Here we take the original mantissa and check the final + // bit which is shifted out when converting the mantissa. If that + // value is one, then we should round up to the next representable + // number. If the value is one and all other discarded mantissa + // bits are zero, round towards the number which has an even (0) + // bit value in the least significant mantissa bit. + // + // For denormals, the process is similar however we check the nth + // bit of the converted mantissa, where n is the absolute value of + // the converted exponent. If the value of |exp| is larger than + // the max exponent, round to zero. If it is exactly equal, always + // round up. + // + // If the number of destination and source format mantissa bits are + // the same, the mantissa is unchanged. + if (int(sFMT::mbits) > int(dFMT::mbits) + && mode == roundTiesToEven) { + bool round_up = false; + + int check_shift = sFMT::mbits - dFMT::mbits - 1; + uint32_t check_mant = in.mant & mask(sFMT::mbits); + + check_mant >>= check_shift; + + // out.exp == 0 means subnormal + if (out.exp == 0) { + check_mant = in.mant >> (sFMT::mbits - dFMT::mbits); + + uint32_t max_exp = mask(dFMT::ebits); + if (-exp > max_exp) { + // if exp < -(1 << dFMT::ebits), result should be 0 + round_up = false; + } else if (-exp == max_exp) { + // if exp == -(1 << dFMT::ebits), round up + round_up = true; + } else { + // Use the |exp|'th bit to determine rounding + int check_bit = 1 << -exp; + round_up = (check_mant & check_bit); + } + } else { + round_up = (check_mant & 0x1); + } + + // For roundTiesToEven, if we are exactly between two + // representable numbers, pick the one with an even least + // significant mantissa bit. We are exactly between when + // all of the discarded mantissa bits are 0 (i.e., !sticky). + int sticky = in.mant & mask(sFMT::mbits - dFMT::mbits); + if (round_up && !sticky) { + if (!(out.mant & 1)) { + round_up = false; + } + } + + if (round_up) { + if (out.mant == mask(dFMT::mbits)) { + // mantissa at max value, increment exponent if not inf + if (out.exp != mask(dFMT::ebits)) { + out.exp++; + } + out.mant = 0; + } else { + out.mant++; + } + } + } else if (int(sFMT::mbits) > int(dFMT::mbits) + && mode == roundStochastic) { + // Use the discarded mantissa divided by the max mantissa of + // the source format to determine the probability of rounding + // up. An alternate implementation of this would be to get a + // random number and add that to the input mantissa. Then + // follow the normal rounding path above. + uint32_t discarded = in.mant & mask(sFMT::mbits - dFMT::mbits); + uint32_t max_mant = mask(sFMT::mbits); + + float round_prob = float(discarded) / float(max_mant); + + // Use a stochastic rounding function with the seed value to + // determine compare probability. This is implemented as a + // "Galois LFSR." + auto srFunc = [](uint32_t in) { + uint32_t bit = (in ^ (in >> 1) ^ (in >> 3) ^ (in >> 12)); + return (in >> 1) | (bit << 15); + }; + + // Assume stochastic rounding returns up to max uint32_t. + // This will return an FP value between 0.0f and 1.0f. + float draw_prob = float(srFunc(seed)) + / float(std::numeric_limits::max()); + + // Round up if the number we drew is less than the rounding + // probability. E.g., if round_prob is 90% (0.9) we choose + // values 0.0f - 0.90f to round up. + if (round_prob >= draw_prob) { + if (out.mant == mask(dFMT::mbits)) { + // mantissa at max value, increment exponent if not inf + if (out.exp != mask(dFMT::ebits)) { + out.exp++; + } + out.mant = 0; + } else { + out.mant++; + } + } + } + } + } else if (int(sFMT::mbits) <= int(dFMT::mbits) && + int(sFMT::ebits) <= int(dFMT::ebits)) { + // Input format is smaller. Extend mantissa / exponent and pad with 0. + // Should be the same for all non-stochastic rounding modes. + + if (std::isnan(in)) { + // For types with no NaN return max value. + if (std::numeric_limits::has_quiet_NaN) { + out = std::numeric_limits::quiet_NaN(); + } else { + out = std::numeric_limits::max(); + } + } else if (std::isinf(in)) { + // For types with no Inf return max value. + if (std::numeric_limits::has_infinity) { + out = std::numeric_limits::infinity(); + } else { + out = std::numeric_limits::max(); + } + } else if (in.mant == 0 && in.exp == 0) { + // All MX formats FP32, and FP64 encode 0 as all zeros. Keep sign. + out.mant = 0; + out.exp = 0; + out.sign = in.sign; + } else { + out.mant = in.mant << (dFMT::mbits - sFMT::mbits); + out.exp = in.exp + dFMT::bias - sFMT::bias; + out.sign = in.sign; + + // Normalize input denormals + if (!in.exp && int(sFMT::ebits) != int(dFMT::ebits)) { + uint32_t m = out.mant; + if (m != 0) { + out.exp++; + while (!(m >> dFMT::mbits)) { + m <<= 1; + out.exp--; + } + out.mant = m & mask(dFMT::mbits); + } + } else if (!in.exp) { + // Exponent is the same, but output is not denorm, so add + // implicit 1. This is specific mainly to bf16 -> f32. + uint32_t m = out.mant; + m <<= 1; + out.mant = m & mask(dFMT::mbits); + } + } + } else { + assert(false); + } + + return out; +} + +template +int min_exp() +{ + return 1; +} + +template +int max_exp() +{ + return (1 << FMT::ebits) - 1; +} + + +} // namespace AMDGPU + +} // namespace gem5 + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_CONVERT_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp_type_info.hh b/src/arch/amdgpu/common/dtype/mxfp_type_info.hh new file mode 100644 index 0000000000..fe433523d6 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp_type_info.hh @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPE_INFO_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPE_INFO_HH__ + +#include "arch/amdgpu/common/dtype/binary32.hh" +#include "arch/amdgpu/common/dtype/fp16_e5m10.hh" +#include "arch/amdgpu/common/dtype/fp16_e8m7.hh" +#include "arch/amdgpu/common/dtype/fp8_e4m3.hh" +#include "arch/amdgpu/common/dtype/fp8_e5m2.hh" + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPE_INFO_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp_types.hh b/src/arch/amdgpu/common/dtype/mxfp_types.hh new file mode 100644 index 0000000000..29155901d4 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp_types.hh @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPES_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPES_HH__ + +#include "arch/amdgpu/common/dtype/mxfp.hh" + +namespace gem5 +{ +namespace AMDGPU +{ + +using mxbfloat8 = mxfp; +using mxfloat8 = mxfp; + +using mxbfloat16 = mxfp; +using mxfloat16 = mxfp; + +using mxfloat32 = mxfp; + +} +} + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPES_HH__ diff --git a/src/arch/amdgpu/gcn3/decoder.cc b/src/arch/amdgpu/gcn3/decoder.cc deleted file mode 100644 index c0fcc3a7dd..0000000000 --- a/src/arch/amdgpu/gcn3/decoder.cc +++ /dev/null @@ -1,10814 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "arch/amdgpu/gcn3/gpu_decoder.hh" -#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" -#include "arch/amdgpu/gcn3/insts/instructions.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - Decoder::Decoder() - { - } // Decoder - - Decoder::~Decoder() - { - } // ~Decoder - - IsaDecodeMethod Decoder::tableDecodePrimary[] = { - &Decoder::decode_OP_VOP2__V_CNDMASK_B32, - &Decoder::decode_OP_VOP2__V_CNDMASK_B32, - &Decoder::decode_OP_VOP2__V_CNDMASK_B32, - &Decoder::decode_OP_VOP2__V_CNDMASK_B32, - &Decoder::decode_OP_VOP2__V_ADD_F32, - &Decoder::decode_OP_VOP2__V_ADD_F32, - &Decoder::decode_OP_VOP2__V_ADD_F32, - &Decoder::decode_OP_VOP2__V_ADD_F32, - &Decoder::decode_OP_VOP2__V_SUB_F32, - &Decoder::decode_OP_VOP2__V_SUB_F32, - &Decoder::decode_OP_VOP2__V_SUB_F32, - &Decoder::decode_OP_VOP2__V_SUB_F32, - &Decoder::decode_OP_VOP2__V_SUBREV_F32, - &Decoder::decode_OP_VOP2__V_SUBREV_F32, - &Decoder::decode_OP_VOP2__V_SUBREV_F32, - &Decoder::decode_OP_VOP2__V_SUBREV_F32, - &Decoder::decode_OP_VOP2__V_MUL_LEGACY_F32, - &Decoder::decode_OP_VOP2__V_MUL_LEGACY_F32, - &Decoder::decode_OP_VOP2__V_MUL_LEGACY_F32, - &Decoder::decode_OP_VOP2__V_MUL_LEGACY_F32, - &Decoder::decode_OP_VOP2__V_MUL_F32, - &Decoder::decode_OP_VOP2__V_MUL_F32, - &Decoder::decode_OP_VOP2__V_MUL_F32, - &Decoder::decode_OP_VOP2__V_MUL_F32, - &Decoder::decode_OP_VOP2__V_MUL_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_HI_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_HI_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_HI_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_HI_I32_I24, - &Decoder::decode_OP_VOP2__V_MUL_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_HI_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_HI_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_HI_U32_U24, - &Decoder::decode_OP_VOP2__V_MUL_HI_U32_U24, - &Decoder::decode_OP_VOP2__V_MIN_F32, - &Decoder::decode_OP_VOP2__V_MIN_F32, - &Decoder::decode_OP_VOP2__V_MIN_F32, - &Decoder::decode_OP_VOP2__V_MIN_F32, - &Decoder::decode_OP_VOP2__V_MAX_F32, - &Decoder::decode_OP_VOP2__V_MAX_F32, - &Decoder::decode_OP_VOP2__V_MAX_F32, - &Decoder::decode_OP_VOP2__V_MAX_F32, - &Decoder::decode_OP_VOP2__V_MIN_I32, - &Decoder::decode_OP_VOP2__V_MIN_I32, - &Decoder::decode_OP_VOP2__V_MIN_I32, - &Decoder::decode_OP_VOP2__V_MIN_I32, - &Decoder::decode_OP_VOP2__V_MAX_I32, - &Decoder::decode_OP_VOP2__V_MAX_I32, - &Decoder::decode_OP_VOP2__V_MAX_I32, - &Decoder::decode_OP_VOP2__V_MAX_I32, - &Decoder::decode_OP_VOP2__V_MIN_U32, - &Decoder::decode_OP_VOP2__V_MIN_U32, - &Decoder::decode_OP_VOP2__V_MIN_U32, - &Decoder::decode_OP_VOP2__V_MIN_U32, - &Decoder::decode_OP_VOP2__V_MAX_U32, - &Decoder::decode_OP_VOP2__V_MAX_U32, - &Decoder::decode_OP_VOP2__V_MAX_U32, - &Decoder::decode_OP_VOP2__V_MAX_U32, - &Decoder::decode_OP_VOP2__V_LSHRREV_B32, - &Decoder::decode_OP_VOP2__V_LSHRREV_B32, - &Decoder::decode_OP_VOP2__V_LSHRREV_B32, - &Decoder::decode_OP_VOP2__V_LSHRREV_B32, - &Decoder::decode_OP_VOP2__V_ASHRREV_I32, - &Decoder::decode_OP_VOP2__V_ASHRREV_I32, - &Decoder::decode_OP_VOP2__V_ASHRREV_I32, - &Decoder::decode_OP_VOP2__V_ASHRREV_I32, - &Decoder::decode_OP_VOP2__V_LSHLREV_B32, - &Decoder::decode_OP_VOP2__V_LSHLREV_B32, - &Decoder::decode_OP_VOP2__V_LSHLREV_B32, - &Decoder::decode_OP_VOP2__V_LSHLREV_B32, - &Decoder::decode_OP_VOP2__V_AND_B32, - &Decoder::decode_OP_VOP2__V_AND_B32, - &Decoder::decode_OP_VOP2__V_AND_B32, - &Decoder::decode_OP_VOP2__V_AND_B32, - &Decoder::decode_OP_VOP2__V_OR_B32, - &Decoder::decode_OP_VOP2__V_OR_B32, - &Decoder::decode_OP_VOP2__V_OR_B32, - &Decoder::decode_OP_VOP2__V_OR_B32, - &Decoder::decode_OP_VOP2__V_XOR_B32, - &Decoder::decode_OP_VOP2__V_XOR_B32, - &Decoder::decode_OP_VOP2__V_XOR_B32, - &Decoder::decode_OP_VOP2__V_XOR_B32, - &Decoder::decode_OP_VOP2__V_MAC_F32, - &Decoder::decode_OP_VOP2__V_MAC_F32, - &Decoder::decode_OP_VOP2__V_MAC_F32, - &Decoder::decode_OP_VOP2__V_MAC_F32, - &Decoder::decode_OP_VOP2__V_MADMK_F32, - &Decoder::decode_OP_VOP2__V_MADMK_F32, - &Decoder::decode_OP_VOP2__V_MADMK_F32, - &Decoder::decode_OP_VOP2__V_MADMK_F32, - &Decoder::decode_OP_VOP2__V_MADAK_F32, - &Decoder::decode_OP_VOP2__V_MADAK_F32, - &Decoder::decode_OP_VOP2__V_MADAK_F32, - &Decoder::decode_OP_VOP2__V_MADAK_F32, - &Decoder::decode_OP_VOP2__V_ADD_U32, - &Decoder::decode_OP_VOP2__V_ADD_U32, - &Decoder::decode_OP_VOP2__V_ADD_U32, - &Decoder::decode_OP_VOP2__V_ADD_U32, - &Decoder::decode_OP_VOP2__V_SUB_U32, - &Decoder::decode_OP_VOP2__V_SUB_U32, - &Decoder::decode_OP_VOP2__V_SUB_U32, - &Decoder::decode_OP_VOP2__V_SUB_U32, - &Decoder::decode_OP_VOP2__V_SUBREV_U32, - &Decoder::decode_OP_VOP2__V_SUBREV_U32, - &Decoder::decode_OP_VOP2__V_SUBREV_U32, - &Decoder::decode_OP_VOP2__V_SUBREV_U32, - &Decoder::decode_OP_VOP2__V_ADDC_U32, - &Decoder::decode_OP_VOP2__V_ADDC_U32, - &Decoder::decode_OP_VOP2__V_ADDC_U32, - &Decoder::decode_OP_VOP2__V_ADDC_U32, - &Decoder::decode_OP_VOP2__V_SUBB_U32, - &Decoder::decode_OP_VOP2__V_SUBB_U32, - &Decoder::decode_OP_VOP2__V_SUBB_U32, - &Decoder::decode_OP_VOP2__V_SUBB_U32, - &Decoder::decode_OP_VOP2__V_SUBBREV_U32, - &Decoder::decode_OP_VOP2__V_SUBBREV_U32, - &Decoder::decode_OP_VOP2__V_SUBBREV_U32, - &Decoder::decode_OP_VOP2__V_SUBBREV_U32, - &Decoder::decode_OP_VOP2__V_ADD_F16, - &Decoder::decode_OP_VOP2__V_ADD_F16, - &Decoder::decode_OP_VOP2__V_ADD_F16, - &Decoder::decode_OP_VOP2__V_ADD_F16, - &Decoder::decode_OP_VOP2__V_SUB_F16, - &Decoder::decode_OP_VOP2__V_SUB_F16, - &Decoder::decode_OP_VOP2__V_SUB_F16, - &Decoder::decode_OP_VOP2__V_SUB_F16, - &Decoder::decode_OP_VOP2__V_SUBREV_F16, - &Decoder::decode_OP_VOP2__V_SUBREV_F16, - &Decoder::decode_OP_VOP2__V_SUBREV_F16, - &Decoder::decode_OP_VOP2__V_SUBREV_F16, - &Decoder::decode_OP_VOP2__V_MUL_F16, - &Decoder::decode_OP_VOP2__V_MUL_F16, - &Decoder::decode_OP_VOP2__V_MUL_F16, - &Decoder::decode_OP_VOP2__V_MUL_F16, - &Decoder::decode_OP_VOP2__V_MAC_F16, - &Decoder::decode_OP_VOP2__V_MAC_F16, - &Decoder::decode_OP_VOP2__V_MAC_F16, - &Decoder::decode_OP_VOP2__V_MAC_F16, - &Decoder::decode_OP_VOP2__V_MADMK_F16, - &Decoder::decode_OP_VOP2__V_MADMK_F16, - &Decoder::decode_OP_VOP2__V_MADMK_F16, - &Decoder::decode_OP_VOP2__V_MADMK_F16, - &Decoder::decode_OP_VOP2__V_MADAK_F16, - &Decoder::decode_OP_VOP2__V_MADAK_F16, - &Decoder::decode_OP_VOP2__V_MADAK_F16, - &Decoder::decode_OP_VOP2__V_MADAK_F16, - &Decoder::decode_OP_VOP2__V_ADD_U16, - &Decoder::decode_OP_VOP2__V_ADD_U16, - &Decoder::decode_OP_VOP2__V_ADD_U16, - &Decoder::decode_OP_VOP2__V_ADD_U16, - &Decoder::decode_OP_VOP2__V_SUB_U16, - &Decoder::decode_OP_VOP2__V_SUB_U16, - &Decoder::decode_OP_VOP2__V_SUB_U16, - &Decoder::decode_OP_VOP2__V_SUB_U16, - &Decoder::decode_OP_VOP2__V_SUBREV_U16, - &Decoder::decode_OP_VOP2__V_SUBREV_U16, - &Decoder::decode_OP_VOP2__V_SUBREV_U16, - &Decoder::decode_OP_VOP2__V_SUBREV_U16, - &Decoder::decode_OP_VOP2__V_MUL_LO_U16, - &Decoder::decode_OP_VOP2__V_MUL_LO_U16, - &Decoder::decode_OP_VOP2__V_MUL_LO_U16, - &Decoder::decode_OP_VOP2__V_MUL_LO_U16, - &Decoder::decode_OP_VOP2__V_LSHLREV_B16, - &Decoder::decode_OP_VOP2__V_LSHLREV_B16, - &Decoder::decode_OP_VOP2__V_LSHLREV_B16, - &Decoder::decode_OP_VOP2__V_LSHLREV_B16, - &Decoder::decode_OP_VOP2__V_LSHRREV_B16, - &Decoder::decode_OP_VOP2__V_LSHRREV_B16, - &Decoder::decode_OP_VOP2__V_LSHRREV_B16, - &Decoder::decode_OP_VOP2__V_LSHRREV_B16, - &Decoder::decode_OP_VOP2__V_ASHRREV_I16, - &Decoder::decode_OP_VOP2__V_ASHRREV_I16, - &Decoder::decode_OP_VOP2__V_ASHRREV_I16, - &Decoder::decode_OP_VOP2__V_ASHRREV_I16, - &Decoder::decode_OP_VOP2__V_MAX_F16, - &Decoder::decode_OP_VOP2__V_MAX_F16, - &Decoder::decode_OP_VOP2__V_MAX_F16, - &Decoder::decode_OP_VOP2__V_MAX_F16, - &Decoder::decode_OP_VOP2__V_MIN_F16, - &Decoder::decode_OP_VOP2__V_MIN_F16, - &Decoder::decode_OP_VOP2__V_MIN_F16, - &Decoder::decode_OP_VOP2__V_MIN_F16, - &Decoder::decode_OP_VOP2__V_MAX_U16, - &Decoder::decode_OP_VOP2__V_MAX_U16, - &Decoder::decode_OP_VOP2__V_MAX_U16, - &Decoder::decode_OP_VOP2__V_MAX_U16, - &Decoder::decode_OP_VOP2__V_MAX_I16, - &Decoder::decode_OP_VOP2__V_MAX_I16, - &Decoder::decode_OP_VOP2__V_MAX_I16, - &Decoder::decode_OP_VOP2__V_MAX_I16, - &Decoder::decode_OP_VOP2__V_MIN_U16, - &Decoder::decode_OP_VOP2__V_MIN_U16, - &Decoder::decode_OP_VOP2__V_MIN_U16, - &Decoder::decode_OP_VOP2__V_MIN_U16, - &Decoder::decode_OP_VOP2__V_MIN_I16, - &Decoder::decode_OP_VOP2__V_MIN_I16, - &Decoder::decode_OP_VOP2__V_MIN_I16, - &Decoder::decode_OP_VOP2__V_MIN_I16, - &Decoder::decode_OP_VOP2__V_LDEXP_F16, - &Decoder::decode_OP_VOP2__V_LDEXP_F16, - &Decoder::decode_OP_VOP2__V_LDEXP_F16, - &Decoder::decode_OP_VOP2__V_LDEXP_F16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_VOPC, - &Decoder::subDecode_OP_VOPC, - &Decoder::subDecode_OP_VOPC, - &Decoder::subDecode_OP_VOPC, - &Decoder::subDecode_OP_VOP1, - &Decoder::subDecode_OP_VOP1, - &Decoder::subDecode_OP_VOP1, - &Decoder::subDecode_OP_VOP1, - &Decoder::decode_OP_SOP2__S_ADD_U32, - &Decoder::decode_OP_SOP2__S_SUB_U32, - &Decoder::decode_OP_SOP2__S_ADD_I32, - &Decoder::decode_OP_SOP2__S_SUB_I32, - &Decoder::decode_OP_SOP2__S_ADDC_U32, - &Decoder::decode_OP_SOP2__S_SUBB_U32, - &Decoder::decode_OP_SOP2__S_MIN_I32, - &Decoder::decode_OP_SOP2__S_MIN_U32, - &Decoder::decode_OP_SOP2__S_MAX_I32, - &Decoder::decode_OP_SOP2__S_MAX_U32, - &Decoder::decode_OP_SOP2__S_CSELECT_B32, - &Decoder::decode_OP_SOP2__S_CSELECT_B64, - &Decoder::decode_OP_SOP2__S_AND_B32, - &Decoder::decode_OP_SOP2__S_AND_B64, - &Decoder::decode_OP_SOP2__S_OR_B32, - &Decoder::decode_OP_SOP2__S_OR_B64, - &Decoder::decode_OP_SOP2__S_XOR_B32, - &Decoder::decode_OP_SOP2__S_XOR_B64, - &Decoder::decode_OP_SOP2__S_ANDN2_B32, - &Decoder::decode_OP_SOP2__S_ANDN2_B64, - &Decoder::decode_OP_SOP2__S_ORN2_B32, - &Decoder::decode_OP_SOP2__S_ORN2_B64, - &Decoder::decode_OP_SOP2__S_NAND_B32, - &Decoder::decode_OP_SOP2__S_NAND_B64, - &Decoder::decode_OP_SOP2__S_NOR_B32, - &Decoder::decode_OP_SOP2__S_NOR_B64, - &Decoder::decode_OP_SOP2__S_XNOR_B32, - &Decoder::decode_OP_SOP2__S_XNOR_B64, - &Decoder::decode_OP_SOP2__S_LSHL_B32, - &Decoder::decode_OP_SOP2__S_LSHL_B64, - &Decoder::decode_OP_SOP2__S_LSHR_B32, - &Decoder::decode_OP_SOP2__S_LSHR_B64, - &Decoder::decode_OP_SOP2__S_ASHR_I32, - &Decoder::decode_OP_SOP2__S_ASHR_I64, - &Decoder::decode_OP_SOP2__S_BFM_B32, - &Decoder::decode_OP_SOP2__S_BFM_B64, - &Decoder::decode_OP_SOP2__S_MUL_I32, - &Decoder::decode_OP_SOP2__S_BFE_U32, - &Decoder::decode_OP_SOP2__S_BFE_I32, - &Decoder::decode_OP_SOP2__S_BFE_U64, - &Decoder::decode_OP_SOP2__S_BFE_I64, - &Decoder::decode_OP_SOP2__S_CBRANCH_G_FORK, - &Decoder::decode_OP_SOP2__S_ABSDIFF_I32, - &Decoder::decode_OP_SOP2__S_RFE_RESTORE_B64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_SOPK__S_MOVK_I32, - &Decoder::decode_OP_SOPK__S_CMOVK_I32, - &Decoder::decode_OP_SOPK__S_CMPK_EQ_I32, - &Decoder::decode_OP_SOPK__S_CMPK_LG_I32, - &Decoder::decode_OP_SOPK__S_CMPK_GT_I32, - &Decoder::decode_OP_SOPK__S_CMPK_GE_I32, - &Decoder::decode_OP_SOPK__S_CMPK_LT_I32, - &Decoder::decode_OP_SOPK__S_CMPK_LE_I32, - &Decoder::decode_OP_SOPK__S_CMPK_EQ_U32, - &Decoder::decode_OP_SOPK__S_CMPK_LG_U32, - &Decoder::decode_OP_SOPK__S_CMPK_GT_U32, - &Decoder::decode_OP_SOPK__S_CMPK_GE_U32, - &Decoder::decode_OP_SOPK__S_CMPK_LT_U32, - &Decoder::decode_OP_SOPK__S_CMPK_LE_U32, - &Decoder::decode_OP_SOPK__S_ADDK_I32, - &Decoder::decode_OP_SOPK__S_MULK_I32, - &Decoder::decode_OP_SOPK__S_CBRANCH_I_FORK, - &Decoder::decode_OP_SOPK__S_GETREG_B32, - &Decoder::decode_OP_SOPK__S_SETREG_B32, - &Decoder::decode_invalid, - &Decoder::decode_OP_SOPK__S_SETREG_IMM32_B32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_SOP1, - &Decoder::subDecode_OP_SOPC, - &Decoder::subDecode_OP_SOPP, - &Decoder::subDecode_OP_SMEM, - &Decoder::subDecode_OP_SMEM, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_OP_EXP, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OPU_VOP3, - &Decoder::subDecode_OPU_VOP3, - &Decoder::subDecode_OPU_VOP3, - &Decoder::subDecode_OPU_VOP3, - &Decoder::subDecode_OPU_VOP3, - &Decoder::subDecode_OPU_VOP3, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_VINTRP, - &Decoder::subDecode_OP_DS, - &Decoder::subDecode_OP_DS, - &Decoder::subDecode_OP_DS, - &Decoder::subDecode_OP_DS, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_FLAT, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_FLAT, - &Decoder::subDecode_OP_FLAT, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_MUBUF, - &Decoder::subDecode_OP_MUBUF, - &Decoder::subDecode_OP_MUBUF, - &Decoder::subDecode_OP_MUBUF, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::subDecode_OP_MTBUF, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::subDecode_OP_MIMG, - &Decoder::subDecode_OP_MIMG, - &Decoder::subDecode_OP_MIMG, - &Decoder::subDecode_OP_MIMG, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OPU_VOP3[] = { - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_CMP_CLASS_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_CLASS_F32, - &Decoder::decode_OPU_VOP3__V_CMP_CLASS_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_CLASS_F64, - &Decoder::decode_OPU_VOP3__V_CMP_CLASS_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_CLASS_F16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_CMP_F_F16, - &Decoder::decode_OPU_VOP3__V_CMP_LT_F16, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_F16, - &Decoder::decode_OPU_VOP3__V_CMP_LE_F16, - &Decoder::decode_OPU_VOP3__V_CMP_GT_F16, - &Decoder::decode_OPU_VOP3__V_CMP_LG_F16, - &Decoder::decode_OPU_VOP3__V_CMP_GE_F16, - &Decoder::decode_OPU_VOP3__V_CMP_O_F16, - &Decoder::decode_OPU_VOP3__V_CMP_U_F16, - &Decoder::decode_OPU_VOP3__V_CMP_NGE_F16, - &Decoder::decode_OPU_VOP3__V_CMP_NLG_F16, - &Decoder::decode_OPU_VOP3__V_CMP_NGT_F16, - &Decoder::decode_OPU_VOP3__V_CMP_NLE_F16, - &Decoder::decode_OPU_VOP3__V_CMP_NEQ_F16, - &Decoder::decode_OPU_VOP3__V_CMP_NLT_F16, - &Decoder::decode_OPU_VOP3__V_CMP_TRU_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_F_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_LG_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_O_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_U_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_NGE_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_NLG_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_NGT_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_NLE_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_NEQ_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_NLT_F16, - &Decoder::decode_OPU_VOP3__V_CMPX_TRU_F16, - &Decoder::decode_OPU_VOP3__V_CMP_F_F32, - &Decoder::decode_OPU_VOP3__V_CMP_LT_F32, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_F32, - &Decoder::decode_OPU_VOP3__V_CMP_LE_F32, - &Decoder::decode_OPU_VOP3__V_CMP_GT_F32, - &Decoder::decode_OPU_VOP3__V_CMP_LG_F32, - &Decoder::decode_OPU_VOP3__V_CMP_GE_F32, - &Decoder::decode_OPU_VOP3__V_CMP_O_F32, - &Decoder::decode_OPU_VOP3__V_CMP_U_F32, - &Decoder::decode_OPU_VOP3__V_CMP_NGE_F32, - &Decoder::decode_OPU_VOP3__V_CMP_NLG_F32, - &Decoder::decode_OPU_VOP3__V_CMP_NGT_F32, - &Decoder::decode_OPU_VOP3__V_CMP_NLE_F32, - &Decoder::decode_OPU_VOP3__V_CMP_NEQ_F32, - &Decoder::decode_OPU_VOP3__V_CMP_NLT_F32, - &Decoder::decode_OPU_VOP3__V_CMP_TRU_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_F_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_LG_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_O_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_U_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_NGE_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_NLG_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_NGT_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_NLE_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_NEQ_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_NLT_F32, - &Decoder::decode_OPU_VOP3__V_CMPX_TRU_F32, - &Decoder::decode_OPU_VOP3__V_CMP_F_F64, - &Decoder::decode_OPU_VOP3__V_CMP_LT_F64, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_F64, - &Decoder::decode_OPU_VOP3__V_CMP_LE_F64, - &Decoder::decode_OPU_VOP3__V_CMP_GT_F64, - &Decoder::decode_OPU_VOP3__V_CMP_LG_F64, - &Decoder::decode_OPU_VOP3__V_CMP_GE_F64, - &Decoder::decode_OPU_VOP3__V_CMP_O_F64, - &Decoder::decode_OPU_VOP3__V_CMP_U_F64, - &Decoder::decode_OPU_VOP3__V_CMP_NGE_F64, - &Decoder::decode_OPU_VOP3__V_CMP_NLG_F64, - &Decoder::decode_OPU_VOP3__V_CMP_NGT_F64, - &Decoder::decode_OPU_VOP3__V_CMP_NLE_F64, - &Decoder::decode_OPU_VOP3__V_CMP_NEQ_F64, - &Decoder::decode_OPU_VOP3__V_CMP_NLT_F64, - &Decoder::decode_OPU_VOP3__V_CMP_TRU_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_F_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_LG_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_O_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_U_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_NGE_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_NLG_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_NGT_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_NLE_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_NEQ_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_NLT_F64, - &Decoder::decode_OPU_VOP3__V_CMPX_TRU_F64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_CMP_F_I16, - &Decoder::decode_OPU_VOP3__V_CMP_LT_I16, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_I16, - &Decoder::decode_OPU_VOP3__V_CMP_LE_I16, - &Decoder::decode_OPU_VOP3__V_CMP_GT_I16, - &Decoder::decode_OPU_VOP3__V_CMP_NE_I16, - &Decoder::decode_OPU_VOP3__V_CMP_GE_I16, - &Decoder::decode_OPU_VOP3__V_CMP_T_I16, - &Decoder::decode_OPU_VOP3__V_CMP_F_U16, - &Decoder::decode_OPU_VOP3__V_CMP_LT_U16, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_U16, - &Decoder::decode_OPU_VOP3__V_CMP_LE_U16, - &Decoder::decode_OPU_VOP3__V_CMP_GT_U16, - &Decoder::decode_OPU_VOP3__V_CMP_NE_U16, - &Decoder::decode_OPU_VOP3__V_CMP_GE_U16, - &Decoder::decode_OPU_VOP3__V_CMP_T_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_F_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_NE_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_T_I16, - &Decoder::decode_OPU_VOP3__V_CMPX_F_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_NE_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_U16, - &Decoder::decode_OPU_VOP3__V_CMPX_T_U16, - &Decoder::decode_OPU_VOP3__V_CMP_F_I32, - &Decoder::decode_OPU_VOP3__V_CMP_LT_I32, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_I32, - &Decoder::decode_OPU_VOP3__V_CMP_LE_I32, - &Decoder::decode_OPU_VOP3__V_CMP_GT_I32, - &Decoder::decode_OPU_VOP3__V_CMP_NE_I32, - &Decoder::decode_OPU_VOP3__V_CMP_GE_I32, - &Decoder::decode_OPU_VOP3__V_CMP_T_I32, - &Decoder::decode_OPU_VOP3__V_CMP_F_U32, - &Decoder::decode_OPU_VOP3__V_CMP_LT_U32, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_U32, - &Decoder::decode_OPU_VOP3__V_CMP_LE_U32, - &Decoder::decode_OPU_VOP3__V_CMP_GT_U32, - &Decoder::decode_OPU_VOP3__V_CMP_NE_U32, - &Decoder::decode_OPU_VOP3__V_CMP_GE_U32, - &Decoder::decode_OPU_VOP3__V_CMP_T_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_F_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_NE_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_T_I32, - &Decoder::decode_OPU_VOP3__V_CMPX_F_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_NE_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_U32, - &Decoder::decode_OPU_VOP3__V_CMPX_T_U32, - &Decoder::decode_OPU_VOP3__V_CMP_F_I64, - &Decoder::decode_OPU_VOP3__V_CMP_LT_I64, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_I64, - &Decoder::decode_OPU_VOP3__V_CMP_LE_I64, - &Decoder::decode_OPU_VOP3__V_CMP_GT_I64, - &Decoder::decode_OPU_VOP3__V_CMP_NE_I64, - &Decoder::decode_OPU_VOP3__V_CMP_GE_I64, - &Decoder::decode_OPU_VOP3__V_CMP_T_I64, - &Decoder::decode_OPU_VOP3__V_CMP_F_U64, - &Decoder::decode_OPU_VOP3__V_CMP_LT_U64, - &Decoder::decode_OPU_VOP3__V_CMP_EQ_U64, - &Decoder::decode_OPU_VOP3__V_CMP_LE_U64, - &Decoder::decode_OPU_VOP3__V_CMP_GT_U64, - &Decoder::decode_OPU_VOP3__V_CMP_NE_U64, - &Decoder::decode_OPU_VOP3__V_CMP_GE_U64, - &Decoder::decode_OPU_VOP3__V_CMP_T_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_F_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_NE_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_T_I64, - &Decoder::decode_OPU_VOP3__V_CMPX_F_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_LT_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_EQ_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_LE_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_GT_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_NE_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_GE_U64, - &Decoder::decode_OPU_VOP3__V_CMPX_T_U64, - &Decoder::decode_OPU_VOP3__V_CNDMASK_B32, - &Decoder::decode_OPU_VOP3__V_ADD_F32, - &Decoder::decode_OPU_VOP3__V_SUB_F32, - &Decoder::decode_OPU_VOP3__V_SUBREV_F32, - &Decoder::decode_OPU_VOP3__V_MUL_LEGACY_F32, - &Decoder::decode_OPU_VOP3__V_MUL_F32, - &Decoder::decode_OPU_VOP3__V_MUL_I32_I24, - &Decoder::decode_OPU_VOP3__V_MUL_HI_I32_I24, - &Decoder::decode_OPU_VOP3__V_MUL_U32_U24, - &Decoder::decode_OPU_VOP3__V_MUL_HI_U32_U24, - &Decoder::decode_OPU_VOP3__V_MIN_F32, - &Decoder::decode_OPU_VOP3__V_MAX_F32, - &Decoder::decode_OPU_VOP3__V_MIN_I32, - &Decoder::decode_OPU_VOP3__V_MAX_I32, - &Decoder::decode_OPU_VOP3__V_MIN_U32, - &Decoder::decode_OPU_VOP3__V_MAX_U32, - &Decoder::decode_OPU_VOP3__V_LSHRREV_B32, - &Decoder::decode_OPU_VOP3__V_ASHRREV_I32, - &Decoder::decode_OPU_VOP3__V_LSHLREV_B32, - &Decoder::decode_OPU_VOP3__V_AND_B32, - &Decoder::decode_OPU_VOP3__V_OR_B32, - &Decoder::decode_OPU_VOP3__V_XOR_B32, - &Decoder::decode_OPU_VOP3__V_MAC_F32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_ADD_U32, - &Decoder::decode_OPU_VOP3__V_SUB_U32, - &Decoder::decode_OPU_VOP3__V_SUBREV_U32, - &Decoder::decode_OPU_VOP3__V_ADDC_U32, - &Decoder::decode_OPU_VOP3__V_SUBB_U32, - &Decoder::decode_OPU_VOP3__V_SUBBREV_U32, - &Decoder::decode_OPU_VOP3__V_ADD_F16, - &Decoder::decode_OPU_VOP3__V_SUB_F16, - &Decoder::decode_OPU_VOP3__V_SUBREV_F16, - &Decoder::decode_OPU_VOP3__V_MUL_F16, - &Decoder::decode_OPU_VOP3__V_MAC_F16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_ADD_U16, - &Decoder::decode_OPU_VOP3__V_SUB_U16, - &Decoder::decode_OPU_VOP3__V_SUBREV_U16, - &Decoder::decode_OPU_VOP3__V_MUL_LO_U16, - &Decoder::decode_OPU_VOP3__V_LSHLREV_B16, - &Decoder::decode_OPU_VOP3__V_LSHRREV_B16, - &Decoder::decode_OPU_VOP3__V_ASHRREV_I16, - &Decoder::decode_OPU_VOP3__V_MAX_F16, - &Decoder::decode_OPU_VOP3__V_MIN_F16, - &Decoder::decode_OPU_VOP3__V_MAX_U16, - &Decoder::decode_OPU_VOP3__V_MAX_I16, - &Decoder::decode_OPU_VOP3__V_MIN_U16, - &Decoder::decode_OPU_VOP3__V_MIN_I16, - &Decoder::decode_OPU_VOP3__V_LDEXP_F16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_NOP, - &Decoder::decode_OPU_VOP3__V_MOV_B32, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_CVT_I32_F64, - &Decoder::decode_OPU_VOP3__V_CVT_F64_I32, - &Decoder::decode_OPU_VOP3__V_CVT_F32_I32, - &Decoder::decode_OPU_VOP3__V_CVT_F32_U32, - &Decoder::decode_OPU_VOP3__V_CVT_U32_F32, - &Decoder::decode_OPU_VOP3__V_CVT_I32_F32, - &Decoder::decode_OPU_VOP3__V_MOV_FED_B32, - &Decoder::decode_OPU_VOP3__V_CVT_F16_F32, - &Decoder::decode_OPU_VOP3__V_CVT_F32_F16, - &Decoder::decode_OPU_VOP3__V_CVT_RPI_I32_F32, - &Decoder::decode_OPU_VOP3__V_CVT_FLR_I32_F32, - &Decoder::decode_OPU_VOP3__V_CVT_OFF_F32_I4, - &Decoder::decode_OPU_VOP3__V_CVT_F32_F64, - &Decoder::decode_OPU_VOP3__V_CVT_F64_F32, - &Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE0, - &Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE1, - &Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE2, - &Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE3, - &Decoder::decode_OPU_VOP3__V_CVT_U32_F64, - &Decoder::decode_OPU_VOP3__V_CVT_F64_U32, - &Decoder::decode_OPU_VOP3__V_TRUNC_F64, - &Decoder::decode_OPU_VOP3__V_CEIL_F64, - &Decoder::decode_OPU_VOP3__V_RNDNE_F64, - &Decoder::decode_OPU_VOP3__V_FLOOR_F64, - &Decoder::decode_OPU_VOP3__V_FRACT_F32, - &Decoder::decode_OPU_VOP3__V_TRUNC_F32, - &Decoder::decode_OPU_VOP3__V_CEIL_F32, - &Decoder::decode_OPU_VOP3__V_RNDNE_F32, - &Decoder::decode_OPU_VOP3__V_FLOOR_F32, - &Decoder::decode_OPU_VOP3__V_EXP_F32, - &Decoder::decode_OPU_VOP3__V_LOG_F32, - &Decoder::decode_OPU_VOP3__V_RCP_F32, - &Decoder::decode_OPU_VOP3__V_RCP_IFLAG_F32, - &Decoder::decode_OPU_VOP3__V_RSQ_F32, - &Decoder::decode_OPU_VOP3__V_RCP_F64, - &Decoder::decode_OPU_VOP3__V_RSQ_F64, - &Decoder::decode_OPU_VOP3__V_SQRT_F32, - &Decoder::decode_OPU_VOP3__V_SQRT_F64, - &Decoder::decode_OPU_VOP3__V_SIN_F32, - &Decoder::decode_OPU_VOP3__V_COS_F32, - &Decoder::decode_OPU_VOP3__V_NOT_B32, - &Decoder::decode_OPU_VOP3__V_BFREV_B32, - &Decoder::decode_OPU_VOP3__V_FFBH_U32, - &Decoder::decode_OPU_VOP3__V_FFBL_B32, - &Decoder::decode_OPU_VOP3__V_FFBH_I32, - &Decoder::decode_OPU_VOP3__V_FREXP_EXP_I32_F64, - &Decoder::decode_OPU_VOP3__V_FREXP_MANT_F64, - &Decoder::decode_OPU_VOP3__V_FRACT_F64, - &Decoder::decode_OPU_VOP3__V_FREXP_EXP_I32_F32, - &Decoder::decode_OPU_VOP3__V_FREXP_MANT_F32, - &Decoder::decode_OPU_VOP3__V_CLREXCP, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_CVT_F16_U16, - &Decoder::decode_OPU_VOP3__V_CVT_F16_I16, - &Decoder::decode_OPU_VOP3__V_CVT_U16_F16, - &Decoder::decode_OPU_VOP3__V_CVT_I16_F16, - &Decoder::decode_OPU_VOP3__V_RCP_F16, - &Decoder::decode_OPU_VOP3__V_SQRT_F16, - &Decoder::decode_OPU_VOP3__V_RSQ_F16, - &Decoder::decode_OPU_VOP3__V_LOG_F16, - &Decoder::decode_OPU_VOP3__V_EXP_F16, - &Decoder::decode_OPU_VOP3__V_FREXP_MANT_F16, - &Decoder::decode_OPU_VOP3__V_FREXP_EXP_I16_F16, - &Decoder::decode_OPU_VOP3__V_FLOOR_F16, - &Decoder::decode_OPU_VOP3__V_CEIL_F16, - &Decoder::decode_OPU_VOP3__V_TRUNC_F16, - &Decoder::decode_OPU_VOP3__V_RNDNE_F16, - &Decoder::decode_OPU_VOP3__V_FRACT_F16, - &Decoder::decode_OPU_VOP3__V_SIN_F16, - &Decoder::decode_OPU_VOP3__V_COS_F16, - &Decoder::decode_OPU_VOP3__V_EXP_LEGACY_F32, - &Decoder::decode_OPU_VOP3__V_LOG_LEGACY_F32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_MAD_LEGACY_F32, - &Decoder::decode_OPU_VOP3__V_MAD_F32, - &Decoder::decode_OPU_VOP3__V_MAD_I32_I24, - &Decoder::decode_OPU_VOP3__V_MAD_U32_U24, - &Decoder::decode_OPU_VOP3__V_CUBEID_F32, - &Decoder::decode_OPU_VOP3__V_CUBESC_F32, - &Decoder::decode_OPU_VOP3__V_CUBETC_F32, - &Decoder::decode_OPU_VOP3__V_CUBEMA_F32, - &Decoder::decode_OPU_VOP3__V_BFE_U32, - &Decoder::decode_OPU_VOP3__V_BFE_I32, - &Decoder::decode_OPU_VOP3__V_BFI_B32, - &Decoder::decode_OPU_VOP3__V_FMA_F32, - &Decoder::decode_OPU_VOP3__V_FMA_F64, - &Decoder::decode_OPU_VOP3__V_LERP_U8, - &Decoder::decode_OPU_VOP3__V_ALIGNBIT_B32, - &Decoder::decode_OPU_VOP3__V_ALIGNBYTE_B32, - &Decoder::decode_OPU_VOP3__V_MIN3_F32, - &Decoder::decode_OPU_VOP3__V_MIN3_I32, - &Decoder::decode_OPU_VOP3__V_MIN3_U32, - &Decoder::decode_OPU_VOP3__V_MAX3_F32, - &Decoder::decode_OPU_VOP3__V_MAX3_I32, - &Decoder::decode_OPU_VOP3__V_MAX3_U32, - &Decoder::decode_OPU_VOP3__V_MED3_F32, - &Decoder::decode_OPU_VOP3__V_MED3_I32, - &Decoder::decode_OPU_VOP3__V_MED3_U32, - &Decoder::decode_OPU_VOP3__V_SAD_U8, - &Decoder::decode_OPU_VOP3__V_SAD_HI_U8, - &Decoder::decode_OPU_VOP3__V_SAD_U16, - &Decoder::decode_OPU_VOP3__V_SAD_U32, - &Decoder::decode_OPU_VOP3__V_CVT_PK_U8_F32, - &Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F32, - &Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F64, - &Decoder::decode_OPU_VOP3__V_DIV_SCALE_F32, - &Decoder::decode_OPU_VOP3__V_DIV_SCALE_F64, - &Decoder::decode_OPU_VOP3__V_DIV_FMAS_F32, - &Decoder::decode_OPU_VOP3__V_DIV_FMAS_F64, - &Decoder::decode_OPU_VOP3__V_MSAD_U8, - &Decoder::decode_OPU_VOP3__V_QSAD_PK_U16_U8, - &Decoder::decode_OPU_VOP3__V_MQSAD_PK_U16_U8, - &Decoder::decode_OPU_VOP3__V_MQSAD_U32_U8, - &Decoder::decode_OPU_VOP3__V_MAD_U64_U32, - &Decoder::decode_OPU_VOP3__V_MAD_I64_I32, - &Decoder::decode_OPU_VOP3__V_MAD_F16, - &Decoder::decode_OPU_VOP3__V_MAD_U16, - &Decoder::decode_OPU_VOP3__V_MAD_I16, - &Decoder::decode_OPU_VOP3__V_PERM_B32, - &Decoder::decode_OPU_VOP3__V_FMA_F16, - &Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F16, - &Decoder::decode_OPU_VOP3__V_CVT_PKACCUM_U8_F32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_INTERP_P1_F32, - &Decoder::decode_OPU_VOP3__V_INTERP_P2_F32, - &Decoder::decode_OPU_VOP3__V_INTERP_MOV_F32, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_INTERP_P1LL_F16, - &Decoder::decode_OPU_VOP3__V_INTERP_P1LV_F16, - &Decoder::decode_OPU_VOP3__V_INTERP_P2_F16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_ADD_F64, - &Decoder::decode_OPU_VOP3__V_MUL_F64, - &Decoder::decode_OPU_VOP3__V_MIN_F64, - &Decoder::decode_OPU_VOP3__V_MAX_F64, - &Decoder::decode_OPU_VOP3__V_LDEXP_F64, - &Decoder::decode_OPU_VOP3__V_MUL_LO_U32, - &Decoder::decode_OPU_VOP3__V_MUL_HI_U32, - &Decoder::decode_OPU_VOP3__V_MUL_HI_I32, - &Decoder::decode_OPU_VOP3__V_LDEXP_F32, - &Decoder::decode_OPU_VOP3__V_READLANE_B32, - &Decoder::decode_OPU_VOP3__V_WRITELANE_B32, - &Decoder::decode_OPU_VOP3__V_BCNT_U32_B32, - &Decoder::decode_OPU_VOP3__V_MBCNT_LO_U32_B32, - &Decoder::decode_OPU_VOP3__V_MBCNT_HI_U32_B32, - &Decoder::decode_invalid, - &Decoder::decode_OPU_VOP3__V_LSHLREV_B64, - &Decoder::decode_OPU_VOP3__V_LSHRREV_B64, - &Decoder::decode_OPU_VOP3__V_ASHRREV_I64, - &Decoder::decode_OPU_VOP3__V_TRIG_PREOP_F64, - &Decoder::decode_OPU_VOP3__V_BFM_B32, - &Decoder::decode_OPU_VOP3__V_CVT_PKNORM_I16_F32, - &Decoder::decode_OPU_VOP3__V_CVT_PKNORM_U16_F32, - &Decoder::decode_OPU_VOP3__V_CVT_PKRTZ_F16_F32, - &Decoder::decode_OPU_VOP3__V_CVT_PK_U16_U32, - &Decoder::decode_OPU_VOP3__V_CVT_PK_I16_I32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_DS[] = { - &Decoder::decode_OP_DS__DS_ADD_U32, - &Decoder::decode_OP_DS__DS_SUB_U32, - &Decoder::decode_OP_DS__DS_RSUB_U32, - &Decoder::decode_OP_DS__DS_INC_U32, - &Decoder::decode_OP_DS__DS_DEC_U32, - &Decoder::decode_OP_DS__DS_MIN_I32, - &Decoder::decode_OP_DS__DS_MAX_I32, - &Decoder::decode_OP_DS__DS_MIN_U32, - &Decoder::decode_OP_DS__DS_MAX_U32, - &Decoder::decode_OP_DS__DS_AND_B32, - &Decoder::decode_OP_DS__DS_OR_B32, - &Decoder::decode_OP_DS__DS_XOR_B32, - &Decoder::decode_OP_DS__DS_MSKOR_B32, - &Decoder::decode_OP_DS__DS_WRITE_B32, - &Decoder::decode_OP_DS__DS_WRITE2_B32, - &Decoder::decode_OP_DS__DS_WRITE2ST64_B32, - &Decoder::decode_OP_DS__DS_CMPST_B32, - &Decoder::decode_OP_DS__DS_CMPST_F32, - &Decoder::decode_OP_DS__DS_MIN_F32, - &Decoder::decode_OP_DS__DS_MAX_F32, - &Decoder::decode_OP_DS__DS_NOP, - &Decoder::decode_OP_DS__DS_ADD_F32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_WRITE_B8, - &Decoder::decode_OP_DS__DS_WRITE_B16, - &Decoder::decode_OP_DS__DS_ADD_RTN_U32, - &Decoder::decode_OP_DS__DS_SUB_RTN_U32, - &Decoder::decode_OP_DS__DS_RSUB_RTN_U32, - &Decoder::decode_OP_DS__DS_INC_RTN_U32, - &Decoder::decode_OP_DS__DS_DEC_RTN_U32, - &Decoder::decode_OP_DS__DS_MIN_RTN_I32, - &Decoder::decode_OP_DS__DS_MAX_RTN_I32, - &Decoder::decode_OP_DS__DS_MIN_RTN_U32, - &Decoder::decode_OP_DS__DS_MAX_RTN_U32, - &Decoder::decode_OP_DS__DS_AND_RTN_B32, - &Decoder::decode_OP_DS__DS_OR_RTN_B32, - &Decoder::decode_OP_DS__DS_XOR_RTN_B32, - &Decoder::decode_OP_DS__DS_MSKOR_RTN_B32, - &Decoder::decode_OP_DS__DS_WRXCHG_RTN_B32, - &Decoder::decode_OP_DS__DS_WRXCHG2_RTN_B32, - &Decoder::decode_OP_DS__DS_WRXCHG2ST64_RTN_B32, - &Decoder::decode_OP_DS__DS_CMPST_RTN_B32, - &Decoder::decode_OP_DS__DS_CMPST_RTN_F32, - &Decoder::decode_OP_DS__DS_MIN_RTN_F32, - &Decoder::decode_OP_DS__DS_MAX_RTN_F32, - &Decoder::decode_OP_DS__DS_WRAP_RTN_B32, - &Decoder::decode_OP_DS__DS_ADD_RTN_F32, - &Decoder::decode_OP_DS__DS_READ_B32, - &Decoder::decode_OP_DS__DS_READ2_B32, - &Decoder::decode_OP_DS__DS_READ2ST64_B32, - &Decoder::decode_OP_DS__DS_READ_I8, - &Decoder::decode_OP_DS__DS_READ_U8, - &Decoder::decode_OP_DS__DS_READ_I16, - &Decoder::decode_OP_DS__DS_READ_U16, - &Decoder::decode_OP_DS__DS_SWIZZLE_B32, - &Decoder::decode_OP_DS__DS_PERMUTE_B32, - &Decoder::decode_OP_DS__DS_BPERMUTE_B32, - &Decoder::decode_OP_DS__DS_ADD_U64, - &Decoder::decode_OP_DS__DS_SUB_U64, - &Decoder::decode_OP_DS__DS_RSUB_U64, - &Decoder::decode_OP_DS__DS_INC_U64, - &Decoder::decode_OP_DS__DS_DEC_U64, - &Decoder::decode_OP_DS__DS_MIN_I64, - &Decoder::decode_OP_DS__DS_MAX_I64, - &Decoder::decode_OP_DS__DS_MIN_U64, - &Decoder::decode_OP_DS__DS_MAX_U64, - &Decoder::decode_OP_DS__DS_AND_B64, - &Decoder::decode_OP_DS__DS_OR_B64, - &Decoder::decode_OP_DS__DS_XOR_B64, - &Decoder::decode_OP_DS__DS_MSKOR_B64, - &Decoder::decode_OP_DS__DS_WRITE_B64, - &Decoder::decode_OP_DS__DS_WRITE2_B64, - &Decoder::decode_OP_DS__DS_WRITE2ST64_B64, - &Decoder::decode_OP_DS__DS_CMPST_B64, - &Decoder::decode_OP_DS__DS_CMPST_F64, - &Decoder::decode_OP_DS__DS_MIN_F64, - &Decoder::decode_OP_DS__DS_MAX_F64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_ADD_RTN_U64, - &Decoder::decode_OP_DS__DS_SUB_RTN_U64, - &Decoder::decode_OP_DS__DS_RSUB_RTN_U64, - &Decoder::decode_OP_DS__DS_INC_RTN_U64, - &Decoder::decode_OP_DS__DS_DEC_RTN_U64, - &Decoder::decode_OP_DS__DS_MIN_RTN_I64, - &Decoder::decode_OP_DS__DS_MAX_RTN_I64, - &Decoder::decode_OP_DS__DS_MIN_RTN_U64, - &Decoder::decode_OP_DS__DS_MAX_RTN_U64, - &Decoder::decode_OP_DS__DS_AND_RTN_B64, - &Decoder::decode_OP_DS__DS_OR_RTN_B64, - &Decoder::decode_OP_DS__DS_XOR_RTN_B64, - &Decoder::decode_OP_DS__DS_MSKOR_RTN_B64, - &Decoder::decode_OP_DS__DS_WRXCHG_RTN_B64, - &Decoder::decode_OP_DS__DS_WRXCHG2_RTN_B64, - &Decoder::decode_OP_DS__DS_WRXCHG2ST64_RTN_B64, - &Decoder::decode_OP_DS__DS_CMPST_RTN_B64, - &Decoder::decode_OP_DS__DS_CMPST_RTN_F64, - &Decoder::decode_OP_DS__DS_MIN_RTN_F64, - &Decoder::decode_OP_DS__DS_MAX_RTN_F64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_READ_B64, - &Decoder::decode_OP_DS__DS_READ2_B64, - &Decoder::decode_OP_DS__DS_READ2ST64_B64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_CONDXCHG32_RTN_B64, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_ADD_SRC2_U32, - &Decoder::decode_OP_DS__DS_SUB_SRC2_U32, - &Decoder::decode_OP_DS__DS_RSUB_SRC2_U32, - &Decoder::decode_OP_DS__DS_INC_SRC2_U32, - &Decoder::decode_OP_DS__DS_DEC_SRC2_U32, - &Decoder::decode_OP_DS__DS_MIN_SRC2_I32, - &Decoder::decode_OP_DS__DS_MAX_SRC2_I32, - &Decoder::decode_OP_DS__DS_MIN_SRC2_U32, - &Decoder::decode_OP_DS__DS_MAX_SRC2_U32, - &Decoder::decode_OP_DS__DS_AND_SRC2_B32, - &Decoder::decode_OP_DS__DS_OR_SRC2_B32, - &Decoder::decode_OP_DS__DS_XOR_SRC2_B32, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_WRITE_SRC2_B32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_MIN_SRC2_F32, - &Decoder::decode_OP_DS__DS_MAX_SRC2_F32, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_ADD_SRC2_F32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_GWS_SEMA_RELEASE_ALL, - &Decoder::decode_OP_DS__DS_GWS_INIT, - &Decoder::decode_OP_DS__DS_GWS_SEMA_V, - &Decoder::decode_OP_DS__DS_GWS_SEMA_BR, - &Decoder::decode_OP_DS__DS_GWS_SEMA_P, - &Decoder::decode_OP_DS__DS_GWS_BARRIER, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_CONSUME, - &Decoder::decode_OP_DS__DS_APPEND, - &Decoder::decode_OP_DS__DS_ORDERED_COUNT, - &Decoder::decode_OP_DS__DS_ADD_SRC2_U64, - &Decoder::decode_OP_DS__DS_SUB_SRC2_U64, - &Decoder::decode_OP_DS__DS_RSUB_SRC2_U64, - &Decoder::decode_OP_DS__DS_INC_SRC2_U64, - &Decoder::decode_OP_DS__DS_DEC_SRC2_U64, - &Decoder::decode_OP_DS__DS_MIN_SRC2_I64, - &Decoder::decode_OP_DS__DS_MAX_SRC2_I64, - &Decoder::decode_OP_DS__DS_MIN_SRC2_U64, - &Decoder::decode_OP_DS__DS_MAX_SRC2_U64, - &Decoder::decode_OP_DS__DS_AND_SRC2_B64, - &Decoder::decode_OP_DS__DS_OR_SRC2_B64, - &Decoder::decode_OP_DS__DS_XOR_SRC2_B64, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_WRITE_SRC2_B64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_MIN_SRC2_F64, - &Decoder::decode_OP_DS__DS_MAX_SRC2_F64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_WRITE_B96, - &Decoder::decode_OP_DS__DS_WRITE_B128, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_DS__DS_READ_B96, - &Decoder::decode_OP_DS__DS_READ_B128 - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_FLAT[] = { - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_FLAT__FLAT_LOAD_UBYTE, - &Decoder::decode_OP_FLAT__FLAT_LOAD_SBYTE, - &Decoder::decode_OP_FLAT__FLAT_LOAD_USHORT, - &Decoder::decode_OP_FLAT__FLAT_LOAD_SSHORT, - &Decoder::decode_OP_FLAT__FLAT_LOAD_DWORD, - &Decoder::decode_OP_FLAT__FLAT_LOAD_DWORDX2, - &Decoder::decode_OP_FLAT__FLAT_LOAD_DWORDX3, - &Decoder::decode_OP_FLAT__FLAT_LOAD_DWORDX4, - &Decoder::decode_OP_FLAT__FLAT_STORE_BYTE, - &Decoder::decode_invalid, - &Decoder::decode_OP_FLAT__FLAT_STORE_SHORT, - &Decoder::decode_invalid, - &Decoder::decode_OP_FLAT__FLAT_STORE_DWORD, - &Decoder::decode_OP_FLAT__FLAT_STORE_DWORDX2, - &Decoder::decode_OP_FLAT__FLAT_STORE_DWORDX3, - &Decoder::decode_OP_FLAT__FLAT_STORE_DWORDX4, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SWAP, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SUB, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMIN, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMIN, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMAX, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMAX, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_AND, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_OR, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_XOR, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_INC, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_DEC, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SUB_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMIN_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMIN_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMAX_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMAX_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_AND_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_OR_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_XOR_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_INC_X2, - &Decoder::decode_OP_FLAT__FLAT_ATOMIC_DEC_X2, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_MIMG[] = { - &Decoder::decode_OP_MIMG__IMAGE_LOAD, - &Decoder::decode_OP_MIMG__IMAGE_LOAD_MIP, - &Decoder::decode_OP_MIMG__IMAGE_LOAD_PCK, - &Decoder::decode_OP_MIMG__IMAGE_LOAD_PCK_SGN, - &Decoder::decode_OP_MIMG__IMAGE_LOAD_MIP_PCK, - &Decoder::decode_OP_MIMG__IMAGE_LOAD_MIP_PCK_SGN, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_STORE, - &Decoder::decode_OP_MIMG__IMAGE_STORE_MIP, - &Decoder::decode_OP_MIMG__IMAGE_STORE_PCK, - &Decoder::decode_OP_MIMG__IMAGE_STORE_MIP_PCK, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_GET_RESINFO, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SWAP, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_CMPSWAP, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_ADD, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SUB, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SMIN, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_UMIN, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SMAX, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_UMAX, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_AND, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_OR, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_XOR, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_INC, - &Decoder::decode_OP_MIMG__IMAGE_ATOMIC_DEC, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_L, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_LZ, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_L, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_LZ, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_L_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_LZ_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_L_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_LZ_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_CL, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_L, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_B, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_B_CL, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_LZ, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_CL, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_L, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B_CL, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_LZ, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_CL_O, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_L_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_B_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_B_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_LZ_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_CL_O, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_L_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_LZ_O, - &Decoder::decode_OP_MIMG__IMAGE_GET_LOD, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD_CL_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD_O, - &Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL_O, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_MTBUF[] = { - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_X, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XY, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZ, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZW, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_X, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XY, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZ, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZW, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_X, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ, - &Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_X, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XY, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ, - &Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_MUBUF[] = { - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_X, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XY, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZ, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZW, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_X, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_XY, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZ, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZW, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_X, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XY, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_X, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XY, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_UBYTE, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_SBYTE, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_USHORT, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_SSHORT, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORD, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORDX2, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORDX3, - &Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORDX4, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_BYTE, - &Decoder::decode_invalid, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_SHORT, - &Decoder::decode_invalid, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORD, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORDX2, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORDX3, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORDX4, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MUBUF__BUFFER_STORE_LDS_DWORD, - &Decoder::decode_OP_MUBUF__BUFFER_WBINVL1, - &Decoder::decode_OP_MUBUF__BUFFER_WBINVL1_VOL, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SWAP, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_ADD, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SUB, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMIN, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMIN, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMAX, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMAX, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_AND, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_OR, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_XOR, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_INC, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_DEC, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SWAP_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_ADD_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SUB_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMIN_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMIN_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMAX_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMAX_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_AND_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_OR_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_XOR_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_INC_X2, - &Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_DEC_X2, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_SMEM[] = { - &Decoder::decode_OP_SMEM__S_LOAD_DWORD, - &Decoder::decode_OP_SMEM__S_LOAD_DWORDX2, - &Decoder::decode_OP_SMEM__S_LOAD_DWORDX4, - &Decoder::decode_OP_SMEM__S_LOAD_DWORDX8, - &Decoder::decode_OP_SMEM__S_LOAD_DWORDX16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORD, - &Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX2, - &Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX4, - &Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX8, - &Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_SMEM__S_STORE_DWORD, - &Decoder::decode_OP_SMEM__S_STORE_DWORDX2, - &Decoder::decode_OP_SMEM__S_STORE_DWORDX4, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_SMEM__S_BUFFER_STORE_DWORD, - &Decoder::decode_OP_SMEM__S_BUFFER_STORE_DWORDX2, - &Decoder::decode_OP_SMEM__S_BUFFER_STORE_DWORDX4, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_SMEM__S_DCACHE_INV, - &Decoder::decode_OP_SMEM__S_DCACHE_WB, - &Decoder::decode_OP_SMEM__S_DCACHE_INV_VOL, - &Decoder::decode_OP_SMEM__S_DCACHE_WB_VOL, - &Decoder::decode_OP_SMEM__S_MEMTIME, - &Decoder::decode_OP_SMEM__S_MEMREALTIME, - &Decoder::decode_OP_SMEM__S_ATC_PROBE, - &Decoder::decode_OP_SMEM__S_ATC_PROBE_BUFFER, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_SOP1[] = { - &Decoder::decode_OP_SOP1__S_MOV_B32, - &Decoder::decode_OP_SOP1__S_MOV_B64, - &Decoder::decode_OP_SOP1__S_CMOV_B32, - &Decoder::decode_OP_SOP1__S_CMOV_B64, - &Decoder::decode_OP_SOP1__S_NOT_B32, - &Decoder::decode_OP_SOP1__S_NOT_B64, - &Decoder::decode_OP_SOP1__S_WQM_B32, - &Decoder::decode_OP_SOP1__S_WQM_B64, - &Decoder::decode_OP_SOP1__S_BREV_B32, - &Decoder::decode_OP_SOP1__S_BREV_B64, - &Decoder::decode_OP_SOP1__S_BCNT0_I32_B32, - &Decoder::decode_OP_SOP1__S_BCNT0_I32_B64, - &Decoder::decode_OP_SOP1__S_BCNT1_I32_B32, - &Decoder::decode_OP_SOP1__S_BCNT1_I32_B64, - &Decoder::decode_OP_SOP1__S_FF0_I32_B32, - &Decoder::decode_OP_SOP1__S_FF0_I32_B64, - &Decoder::decode_OP_SOP1__S_FF1_I32_B32, - &Decoder::decode_OP_SOP1__S_FF1_I32_B64, - &Decoder::decode_OP_SOP1__S_FLBIT_I32_B32, - &Decoder::decode_OP_SOP1__S_FLBIT_I32_B64, - &Decoder::decode_OP_SOP1__S_FLBIT_I32, - &Decoder::decode_OP_SOP1__S_FLBIT_I32_I64, - &Decoder::decode_OP_SOP1__S_SEXT_I32_I8, - &Decoder::decode_OP_SOP1__S_SEXT_I32_I16, - &Decoder::decode_OP_SOP1__S_BITSET0_B32, - &Decoder::decode_OP_SOP1__S_BITSET0_B64, - &Decoder::decode_OP_SOP1__S_BITSET1_B32, - &Decoder::decode_OP_SOP1__S_BITSET1_B64, - &Decoder::decode_OP_SOP1__S_GETPC_B64, - &Decoder::decode_OP_SOP1__S_SETPC_B64, - &Decoder::decode_OP_SOP1__S_SWAPPC_B64, - &Decoder::decode_OP_SOP1__S_RFE_B64, - &Decoder::decode_OP_SOP1__S_AND_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_OR_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_XOR_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_ANDN2_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_ORN2_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_NAND_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_NOR_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_XNOR_SAVEEXEC_B64, - &Decoder::decode_OP_SOP1__S_QUADMASK_B32, - &Decoder::decode_OP_SOP1__S_QUADMASK_B64, - &Decoder::decode_OP_SOP1__S_MOVRELS_B32, - &Decoder::decode_OP_SOP1__S_MOVRELS_B64, - &Decoder::decode_OP_SOP1__S_MOVRELD_B32, - &Decoder::decode_OP_SOP1__S_MOVRELD_B64, - &Decoder::decode_OP_SOP1__S_CBRANCH_JOIN, - &Decoder::decode_invalid, - &Decoder::decode_OP_SOP1__S_ABS_I32, - &Decoder::decode_OP_SOP1__S_MOV_FED_B32, - &Decoder::decode_OP_SOP1__S_SET_GPR_IDX_IDX, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_SOPC[] = { - &Decoder::decode_OP_SOPC__S_CMP_EQ_I32, - &Decoder::decode_OP_SOPC__S_CMP_LG_I32, - &Decoder::decode_OP_SOPC__S_CMP_GT_I32, - &Decoder::decode_OP_SOPC__S_CMP_GE_I32, - &Decoder::decode_OP_SOPC__S_CMP_LT_I32, - &Decoder::decode_OP_SOPC__S_CMP_LE_I32, - &Decoder::decode_OP_SOPC__S_CMP_EQ_U32, - &Decoder::decode_OP_SOPC__S_CMP_LG_U32, - &Decoder::decode_OP_SOPC__S_CMP_GT_U32, - &Decoder::decode_OP_SOPC__S_CMP_GE_U32, - &Decoder::decode_OP_SOPC__S_CMP_LT_U32, - &Decoder::decode_OP_SOPC__S_CMP_LE_U32, - &Decoder::decode_OP_SOPC__S_BITCMP0_B32, - &Decoder::decode_OP_SOPC__S_BITCMP1_B32, - &Decoder::decode_OP_SOPC__S_BITCMP0_B64, - &Decoder::decode_OP_SOPC__S_BITCMP1_B64, - &Decoder::decode_OP_SOPC__S_SETVSKIP, - &Decoder::decode_OP_SOPC__S_SET_GPR_IDX_ON, - &Decoder::decode_OP_SOPC__S_CMP_EQ_U64, - &Decoder::decode_OP_SOPC__S_CMP_LG_U64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_SOPP[] = { - &Decoder::decode_OP_SOPP__S_NOP, - &Decoder::decode_OP_SOPP__S_ENDPGM, - &Decoder::decode_OP_SOPP__S_BRANCH, - &Decoder::decode_OP_SOPP__S_WAKEUP, - &Decoder::decode_OP_SOPP__S_CBRANCH_SCC0, - &Decoder::decode_OP_SOPP__S_CBRANCH_SCC1, - &Decoder::decode_OP_SOPP__S_CBRANCH_VCCZ, - &Decoder::decode_OP_SOPP__S_CBRANCH_VCCNZ, - &Decoder::decode_OP_SOPP__S_CBRANCH_EXECZ, - &Decoder::decode_OP_SOPP__S_CBRANCH_EXECNZ, - &Decoder::decode_OP_SOPP__S_BARRIER, - &Decoder::decode_OP_SOPP__S_SETKILL, - &Decoder::decode_OP_SOPP__S_WAITCNT, - &Decoder::decode_OP_SOPP__S_SETHALT, - &Decoder::decode_OP_SOPP__S_SLEEP, - &Decoder::decode_OP_SOPP__S_SETPRIO, - &Decoder::decode_OP_SOPP__S_SENDMSG, - &Decoder::decode_OP_SOPP__S_SENDMSGHALT, - &Decoder::decode_OP_SOPP__S_TRAP, - &Decoder::decode_OP_SOPP__S_ICACHE_INV, - &Decoder::decode_OP_SOPP__S_INCPERFLEVEL, - &Decoder::decode_OP_SOPP__S_DECPERFLEVEL, - &Decoder::decode_OP_SOPP__S_TTRACEDATA, - &Decoder::decode_OP_SOPP__S_CBRANCH_CDBGSYS, - &Decoder::decode_OP_SOPP__S_CBRANCH_CDBGUSER, - &Decoder::decode_OP_SOPP__S_CBRANCH_CDBGSYS_OR_USER, - &Decoder::decode_OP_SOPP__S_CBRANCH_CDBGSYS_AND_USER, - &Decoder::decode_OP_SOPP__S_ENDPGM_SAVED, - &Decoder::decode_OP_SOPP__S_SET_GPR_IDX_OFF, - &Decoder::decode_OP_SOPP__S_SET_GPR_IDX_MODE, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_VINTRP[] = { - &Decoder::decode_OP_VINTRP__V_INTERP_P1_F32, - &Decoder::decode_OP_VINTRP__V_INTERP_P2_F32, - &Decoder::decode_OP_VINTRP__V_INTERP_MOV_F32, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_VOP1[] = { - &Decoder::decode_OP_VOP1__V_NOP, - &Decoder::decode_OP_VOP1__V_MOV_B32, - &Decoder::decode_OP_VOP1__V_READFIRSTLANE_B32, - &Decoder::decode_OP_VOP1__V_CVT_I32_F64, - &Decoder::decode_OP_VOP1__V_CVT_F64_I32, - &Decoder::decode_OP_VOP1__V_CVT_F32_I32, - &Decoder::decode_OP_VOP1__V_CVT_F32_U32, - &Decoder::decode_OP_VOP1__V_CVT_U32_F32, - &Decoder::decode_OP_VOP1__V_CVT_I32_F32, - &Decoder::decode_OP_VOP1__V_MOV_FED_B32, - &Decoder::decode_OP_VOP1__V_CVT_F16_F32, - &Decoder::decode_OP_VOP1__V_CVT_F32_F16, - &Decoder::decode_OP_VOP1__V_CVT_RPI_I32_F32, - &Decoder::decode_OP_VOP1__V_CVT_FLR_I32_F32, - &Decoder::decode_OP_VOP1__V_CVT_OFF_F32_I4, - &Decoder::decode_OP_VOP1__V_CVT_F32_F64, - &Decoder::decode_OP_VOP1__V_CVT_F64_F32, - &Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE0, - &Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE1, - &Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE2, - &Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE3, - &Decoder::decode_OP_VOP1__V_CVT_U32_F64, - &Decoder::decode_OP_VOP1__V_CVT_F64_U32, - &Decoder::decode_OP_VOP1__V_TRUNC_F64, - &Decoder::decode_OP_VOP1__V_CEIL_F64, - &Decoder::decode_OP_VOP1__V_RNDNE_F64, - &Decoder::decode_OP_VOP1__V_FLOOR_F64, - &Decoder::decode_OP_VOP1__V_FRACT_F32, - &Decoder::decode_OP_VOP1__V_TRUNC_F32, - &Decoder::decode_OP_VOP1__V_CEIL_F32, - &Decoder::decode_OP_VOP1__V_RNDNE_F32, - &Decoder::decode_OP_VOP1__V_FLOOR_F32, - &Decoder::decode_OP_VOP1__V_EXP_F32, - &Decoder::decode_OP_VOP1__V_LOG_F32, - &Decoder::decode_OP_VOP1__V_RCP_F32, - &Decoder::decode_OP_VOP1__V_RCP_IFLAG_F32, - &Decoder::decode_OP_VOP1__V_RSQ_F32, - &Decoder::decode_OP_VOP1__V_RCP_F64, - &Decoder::decode_OP_VOP1__V_RSQ_F64, - &Decoder::decode_OP_VOP1__V_SQRT_F32, - &Decoder::decode_OP_VOP1__V_SQRT_F64, - &Decoder::decode_OP_VOP1__V_SIN_F32, - &Decoder::decode_OP_VOP1__V_COS_F32, - &Decoder::decode_OP_VOP1__V_NOT_B32, - &Decoder::decode_OP_VOP1__V_BFREV_B32, - &Decoder::decode_OP_VOP1__V_FFBH_U32, - &Decoder::decode_OP_VOP1__V_FFBL_B32, - &Decoder::decode_OP_VOP1__V_FFBH_I32, - &Decoder::decode_OP_VOP1__V_FREXP_EXP_I32_F64, - &Decoder::decode_OP_VOP1__V_FREXP_MANT_F64, - &Decoder::decode_OP_VOP1__V_FRACT_F64, - &Decoder::decode_OP_VOP1__V_FREXP_EXP_I32_F32, - &Decoder::decode_OP_VOP1__V_FREXP_MANT_F32, - &Decoder::decode_OP_VOP1__V_CLREXCP, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_VOP1__V_CVT_F16_U16, - &Decoder::decode_OP_VOP1__V_CVT_F16_I16, - &Decoder::decode_OP_VOP1__V_CVT_U16_F16, - &Decoder::decode_OP_VOP1__V_CVT_I16_F16, - &Decoder::decode_OP_VOP1__V_RCP_F16, - &Decoder::decode_OP_VOP1__V_SQRT_F16, - &Decoder::decode_OP_VOP1__V_RSQ_F16, - &Decoder::decode_OP_VOP1__V_LOG_F16, - &Decoder::decode_OP_VOP1__V_EXP_F16, - &Decoder::decode_OP_VOP1__V_FREXP_MANT_F16, - &Decoder::decode_OP_VOP1__V_FREXP_EXP_I16_F16, - &Decoder::decode_OP_VOP1__V_FLOOR_F16, - &Decoder::decode_OP_VOP1__V_CEIL_F16, - &Decoder::decode_OP_VOP1__V_TRUNC_F16, - &Decoder::decode_OP_VOP1__V_RNDNE_F16, - &Decoder::decode_OP_VOP1__V_FRACT_F16, - &Decoder::decode_OP_VOP1__V_SIN_F16, - &Decoder::decode_OP_VOP1__V_COS_F16, - &Decoder::decode_OP_VOP1__V_EXP_LEGACY_F32, - &Decoder::decode_OP_VOP1__V_LOG_LEGACY_F32, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid - }; - - IsaDecodeMethod Decoder::tableSubDecode_OP_VOPC[] = { - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_VOPC__V_CMP_CLASS_F32, - &Decoder::decode_OP_VOPC__V_CMPX_CLASS_F32, - &Decoder::decode_OP_VOPC__V_CMP_CLASS_F64, - &Decoder::decode_OP_VOPC__V_CMPX_CLASS_F64, - &Decoder::decode_OP_VOPC__V_CMP_CLASS_F16, - &Decoder::decode_OP_VOPC__V_CMPX_CLASS_F16, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_VOPC__V_CMP_F_F16, - &Decoder::decode_OP_VOPC__V_CMP_LT_F16, - &Decoder::decode_OP_VOPC__V_CMP_EQ_F16, - &Decoder::decode_OP_VOPC__V_CMP_LE_F16, - &Decoder::decode_OP_VOPC__V_CMP_GT_F16, - &Decoder::decode_OP_VOPC__V_CMP_LG_F16, - &Decoder::decode_OP_VOPC__V_CMP_GE_F16, - &Decoder::decode_OP_VOPC__V_CMP_O_F16, - &Decoder::decode_OP_VOPC__V_CMP_U_F16, - &Decoder::decode_OP_VOPC__V_CMP_NGE_F16, - &Decoder::decode_OP_VOPC__V_CMP_NLG_F16, - &Decoder::decode_OP_VOPC__V_CMP_NGT_F16, - &Decoder::decode_OP_VOPC__V_CMP_NLE_F16, - &Decoder::decode_OP_VOPC__V_CMP_NEQ_F16, - &Decoder::decode_OP_VOPC__V_CMP_NLT_F16, - &Decoder::decode_OP_VOPC__V_CMP_TRU_F16, - &Decoder::decode_OP_VOPC__V_CMPX_F_F16, - &Decoder::decode_OP_VOPC__V_CMPX_LT_F16, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_F16, - &Decoder::decode_OP_VOPC__V_CMPX_LE_F16, - &Decoder::decode_OP_VOPC__V_CMPX_GT_F16, - &Decoder::decode_OP_VOPC__V_CMPX_LG_F16, - &Decoder::decode_OP_VOPC__V_CMPX_GE_F16, - &Decoder::decode_OP_VOPC__V_CMPX_O_F16, - &Decoder::decode_OP_VOPC__V_CMPX_U_F16, - &Decoder::decode_OP_VOPC__V_CMPX_NGE_F16, - &Decoder::decode_OP_VOPC__V_CMPX_NLG_F16, - &Decoder::decode_OP_VOPC__V_CMPX_NGT_F16, - &Decoder::decode_OP_VOPC__V_CMPX_NLE_F16, - &Decoder::decode_OP_VOPC__V_CMPX_NEQ_F16, - &Decoder::decode_OP_VOPC__V_CMPX_NLT_F16, - &Decoder::decode_OP_VOPC__V_CMPX_TRU_F16, - &Decoder::decode_OP_VOPC__V_CMP_F_F32, - &Decoder::decode_OP_VOPC__V_CMP_LT_F32, - &Decoder::decode_OP_VOPC__V_CMP_EQ_F32, - &Decoder::decode_OP_VOPC__V_CMP_LE_F32, - &Decoder::decode_OP_VOPC__V_CMP_GT_F32, - &Decoder::decode_OP_VOPC__V_CMP_LG_F32, - &Decoder::decode_OP_VOPC__V_CMP_GE_F32, - &Decoder::decode_OP_VOPC__V_CMP_O_F32, - &Decoder::decode_OP_VOPC__V_CMP_U_F32, - &Decoder::decode_OP_VOPC__V_CMP_NGE_F32, - &Decoder::decode_OP_VOPC__V_CMP_NLG_F32, - &Decoder::decode_OP_VOPC__V_CMP_NGT_F32, - &Decoder::decode_OP_VOPC__V_CMP_NLE_F32, - &Decoder::decode_OP_VOPC__V_CMP_NEQ_F32, - &Decoder::decode_OP_VOPC__V_CMP_NLT_F32, - &Decoder::decode_OP_VOPC__V_CMP_TRU_F32, - &Decoder::decode_OP_VOPC__V_CMPX_F_F32, - &Decoder::decode_OP_VOPC__V_CMPX_LT_F32, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_F32, - &Decoder::decode_OP_VOPC__V_CMPX_LE_F32, - &Decoder::decode_OP_VOPC__V_CMPX_GT_F32, - &Decoder::decode_OP_VOPC__V_CMPX_LG_F32, - &Decoder::decode_OP_VOPC__V_CMPX_GE_F32, - &Decoder::decode_OP_VOPC__V_CMPX_O_F32, - &Decoder::decode_OP_VOPC__V_CMPX_U_F32, - &Decoder::decode_OP_VOPC__V_CMPX_NGE_F32, - &Decoder::decode_OP_VOPC__V_CMPX_NLG_F32, - &Decoder::decode_OP_VOPC__V_CMPX_NGT_F32, - &Decoder::decode_OP_VOPC__V_CMPX_NLE_F32, - &Decoder::decode_OP_VOPC__V_CMPX_NEQ_F32, - &Decoder::decode_OP_VOPC__V_CMPX_NLT_F32, - &Decoder::decode_OP_VOPC__V_CMPX_TRU_F32, - &Decoder::decode_OP_VOPC__V_CMP_F_F64, - &Decoder::decode_OP_VOPC__V_CMP_LT_F64, - &Decoder::decode_OP_VOPC__V_CMP_EQ_F64, - &Decoder::decode_OP_VOPC__V_CMP_LE_F64, - &Decoder::decode_OP_VOPC__V_CMP_GT_F64, - &Decoder::decode_OP_VOPC__V_CMP_LG_F64, - &Decoder::decode_OP_VOPC__V_CMP_GE_F64, - &Decoder::decode_OP_VOPC__V_CMP_O_F64, - &Decoder::decode_OP_VOPC__V_CMP_U_F64, - &Decoder::decode_OP_VOPC__V_CMP_NGE_F64, - &Decoder::decode_OP_VOPC__V_CMP_NLG_F64, - &Decoder::decode_OP_VOPC__V_CMP_NGT_F64, - &Decoder::decode_OP_VOPC__V_CMP_NLE_F64, - &Decoder::decode_OP_VOPC__V_CMP_NEQ_F64, - &Decoder::decode_OP_VOPC__V_CMP_NLT_F64, - &Decoder::decode_OP_VOPC__V_CMP_TRU_F64, - &Decoder::decode_OP_VOPC__V_CMPX_F_F64, - &Decoder::decode_OP_VOPC__V_CMPX_LT_F64, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_F64, - &Decoder::decode_OP_VOPC__V_CMPX_LE_F64, - &Decoder::decode_OP_VOPC__V_CMPX_GT_F64, - &Decoder::decode_OP_VOPC__V_CMPX_LG_F64, - &Decoder::decode_OP_VOPC__V_CMPX_GE_F64, - &Decoder::decode_OP_VOPC__V_CMPX_O_F64, - &Decoder::decode_OP_VOPC__V_CMPX_U_F64, - &Decoder::decode_OP_VOPC__V_CMPX_NGE_F64, - &Decoder::decode_OP_VOPC__V_CMPX_NLG_F64, - &Decoder::decode_OP_VOPC__V_CMPX_NGT_F64, - &Decoder::decode_OP_VOPC__V_CMPX_NLE_F64, - &Decoder::decode_OP_VOPC__V_CMPX_NEQ_F64, - &Decoder::decode_OP_VOPC__V_CMPX_NLT_F64, - &Decoder::decode_OP_VOPC__V_CMPX_TRU_F64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_VOPC__V_CMP_F_I16, - &Decoder::decode_OP_VOPC__V_CMP_LT_I16, - &Decoder::decode_OP_VOPC__V_CMP_EQ_I16, - &Decoder::decode_OP_VOPC__V_CMP_LE_I16, - &Decoder::decode_OP_VOPC__V_CMP_GT_I16, - &Decoder::decode_OP_VOPC__V_CMP_NE_I16, - &Decoder::decode_OP_VOPC__V_CMP_GE_I16, - &Decoder::decode_OP_VOPC__V_CMP_T_I16, - &Decoder::decode_OP_VOPC__V_CMP_F_U16, - &Decoder::decode_OP_VOPC__V_CMP_LT_U16, - &Decoder::decode_OP_VOPC__V_CMP_EQ_U16, - &Decoder::decode_OP_VOPC__V_CMP_LE_U16, - &Decoder::decode_OP_VOPC__V_CMP_GT_U16, - &Decoder::decode_OP_VOPC__V_CMP_NE_U16, - &Decoder::decode_OP_VOPC__V_CMP_GE_U16, - &Decoder::decode_OP_VOPC__V_CMP_T_U16, - &Decoder::decode_OP_VOPC__V_CMPX_F_I16, - &Decoder::decode_OP_VOPC__V_CMPX_LT_I16, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_I16, - &Decoder::decode_OP_VOPC__V_CMPX_LE_I16, - &Decoder::decode_OP_VOPC__V_CMPX_GT_I16, - &Decoder::decode_OP_VOPC__V_CMPX_NE_I16, - &Decoder::decode_OP_VOPC__V_CMPX_GE_I16, - &Decoder::decode_OP_VOPC__V_CMPX_T_I16, - &Decoder::decode_OP_VOPC__V_CMPX_F_U16, - &Decoder::decode_OP_VOPC__V_CMPX_LT_U16, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_U16, - &Decoder::decode_OP_VOPC__V_CMPX_LE_U16, - &Decoder::decode_OP_VOPC__V_CMPX_GT_U16, - &Decoder::decode_OP_VOPC__V_CMPX_NE_U16, - &Decoder::decode_OP_VOPC__V_CMPX_GE_U16, - &Decoder::decode_OP_VOPC__V_CMPX_T_U16, - &Decoder::decode_OP_VOPC__V_CMP_F_I32, - &Decoder::decode_OP_VOPC__V_CMP_LT_I32, - &Decoder::decode_OP_VOPC__V_CMP_EQ_I32, - &Decoder::decode_OP_VOPC__V_CMP_LE_I32, - &Decoder::decode_OP_VOPC__V_CMP_GT_I32, - &Decoder::decode_OP_VOPC__V_CMP_NE_I32, - &Decoder::decode_OP_VOPC__V_CMP_GE_I32, - &Decoder::decode_OP_VOPC__V_CMP_T_I32, - &Decoder::decode_OP_VOPC__V_CMP_F_U32, - &Decoder::decode_OP_VOPC__V_CMP_LT_U32, - &Decoder::decode_OP_VOPC__V_CMP_EQ_U32, - &Decoder::decode_OP_VOPC__V_CMP_LE_U32, - &Decoder::decode_OP_VOPC__V_CMP_GT_U32, - &Decoder::decode_OP_VOPC__V_CMP_NE_U32, - &Decoder::decode_OP_VOPC__V_CMP_GE_U32, - &Decoder::decode_OP_VOPC__V_CMP_T_U32, - &Decoder::decode_OP_VOPC__V_CMPX_F_I32, - &Decoder::decode_OP_VOPC__V_CMPX_LT_I32, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_I32, - &Decoder::decode_OP_VOPC__V_CMPX_LE_I32, - &Decoder::decode_OP_VOPC__V_CMPX_GT_I32, - &Decoder::decode_OP_VOPC__V_CMPX_NE_I32, - &Decoder::decode_OP_VOPC__V_CMPX_GE_I32, - &Decoder::decode_OP_VOPC__V_CMPX_T_I32, - &Decoder::decode_OP_VOPC__V_CMPX_F_U32, - &Decoder::decode_OP_VOPC__V_CMPX_LT_U32, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_U32, - &Decoder::decode_OP_VOPC__V_CMPX_LE_U32, - &Decoder::decode_OP_VOPC__V_CMPX_GT_U32, - &Decoder::decode_OP_VOPC__V_CMPX_NE_U32, - &Decoder::decode_OP_VOPC__V_CMPX_GE_U32, - &Decoder::decode_OP_VOPC__V_CMPX_T_U32, - &Decoder::decode_OP_VOPC__V_CMP_F_I64, - &Decoder::decode_OP_VOPC__V_CMP_LT_I64, - &Decoder::decode_OP_VOPC__V_CMP_EQ_I64, - &Decoder::decode_OP_VOPC__V_CMP_LE_I64, - &Decoder::decode_OP_VOPC__V_CMP_GT_I64, - &Decoder::decode_OP_VOPC__V_CMP_NE_I64, - &Decoder::decode_OP_VOPC__V_CMP_GE_I64, - &Decoder::decode_OP_VOPC__V_CMP_T_I64, - &Decoder::decode_OP_VOPC__V_CMP_F_U64, - &Decoder::decode_OP_VOPC__V_CMP_LT_U64, - &Decoder::decode_OP_VOPC__V_CMP_EQ_U64, - &Decoder::decode_OP_VOPC__V_CMP_LE_U64, - &Decoder::decode_OP_VOPC__V_CMP_GT_U64, - &Decoder::decode_OP_VOPC__V_CMP_NE_U64, - &Decoder::decode_OP_VOPC__V_CMP_GE_U64, - &Decoder::decode_OP_VOPC__V_CMP_T_U64, - &Decoder::decode_OP_VOPC__V_CMPX_F_I64, - &Decoder::decode_OP_VOPC__V_CMPX_LT_I64, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_I64, - &Decoder::decode_OP_VOPC__V_CMPX_LE_I64, - &Decoder::decode_OP_VOPC__V_CMPX_GT_I64, - &Decoder::decode_OP_VOPC__V_CMPX_NE_I64, - &Decoder::decode_OP_VOPC__V_CMPX_GE_I64, - &Decoder::decode_OP_VOPC__V_CMPX_T_I64, - &Decoder::decode_OP_VOPC__V_CMPX_F_U64, - &Decoder::decode_OP_VOPC__V_CMPX_LT_U64, - &Decoder::decode_OP_VOPC__V_CMPX_EQ_U64, - &Decoder::decode_OP_VOPC__V_CMPX_LE_U64, - &Decoder::decode_OP_VOPC__V_CMPX_GT_U64, - &Decoder::decode_OP_VOPC__V_CMPX_NE_U64, - &Decoder::decode_OP_VOPC__V_CMPX_GE_U64, - &Decoder::decode_OP_VOPC__V_CMPX_T_U64, - }; - - GPUStaticInst* - Decoder::decode(MachInst mach_inst) - { - InFmt_SOP1 *enc = &mach_inst->iFmt_SOP1; - IsaDecodeMethod method = tableDecodePrimary[enc->ENCODING]; - return (this->*method)(mach_inst); - } // decode - - GPUStaticInst* - Decoder::subDecode_OP_VOPC(MachInst iFmt) - { - InFmt_VOPC *enc = &iFmt->iFmt_VOPC; - IsaDecodeMethod method = tableSubDecode_OP_VOPC[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_VOPC - - GPUStaticInst* - Decoder::subDecode_OP_VOP1(MachInst iFmt) - { - InFmt_VOP1 *enc = &iFmt->iFmt_VOP1; - IsaDecodeMethod method = tableSubDecode_OP_VOP1[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_VOP1 - - GPUStaticInst* - Decoder::subDecode_OP_SOP1(MachInst iFmt) - { - InFmt_SOP1 *enc = &iFmt->iFmt_SOP1; - IsaDecodeMethod method = tableSubDecode_OP_SOP1[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_SOP1 - - GPUStaticInst* - Decoder::subDecode_OP_SOPC(MachInst iFmt) - { - InFmt_SOPC *enc = &iFmt->iFmt_SOPC; - IsaDecodeMethod method = tableSubDecode_OP_SOPC[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_SOPC - - GPUStaticInst* - Decoder::subDecode_OP_SOPP(MachInst iFmt) - { - InFmt_SOPP *enc = &iFmt->iFmt_SOPP; - IsaDecodeMethod method = tableSubDecode_OP_SOPP[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_SOPP - - GPUStaticInst* - Decoder::subDecode_OP_SMEM(MachInst iFmt) - { - InFmt_SMEM *enc = &iFmt->iFmt_SMEM; - IsaDecodeMethod method = tableSubDecode_OP_SMEM[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_SMEM - - GPUStaticInst* - Decoder::subDecode_OPU_VOP3(MachInst iFmt) - { - InFmt_VOP3 *enc = &iFmt->iFmt_VOP3; - IsaDecodeMethod method = tableSubDecode_OPU_VOP3[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OPU_VOP3 - - GPUStaticInst* - Decoder::subDecode_OP_VINTRP(MachInst iFmt) - { - InFmt_VINTRP *enc = &iFmt->iFmt_VINTRP; - IsaDecodeMethod method = tableSubDecode_OP_VINTRP[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_VINTRP - - GPUStaticInst* - Decoder::subDecode_OP_DS(MachInst iFmt) - { - InFmt_DS *enc = &iFmt->iFmt_DS; - IsaDecodeMethod method = tableSubDecode_OP_DS[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_DS - - GPUStaticInst* - Decoder::subDecode_OP_FLAT(MachInst iFmt) - { - InFmt_FLAT *enc = &iFmt->iFmt_FLAT; - IsaDecodeMethod method = tableSubDecode_OP_FLAT[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_FLAT - - GPUStaticInst* - Decoder::subDecode_OP_MUBUF(MachInst iFmt) - { - InFmt_MUBUF *enc = &iFmt->iFmt_MUBUF; - IsaDecodeMethod method = tableSubDecode_OP_MUBUF[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_MUBUF - - GPUStaticInst* - Decoder::subDecode_OP_MTBUF(MachInst iFmt) - { - InFmt_MTBUF *enc = &iFmt->iFmt_MTBUF; - IsaDecodeMethod method = tableSubDecode_OP_MTBUF[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_MTBUF - - GPUStaticInst* - Decoder::subDecode_OP_MIMG(MachInst iFmt) - { - InFmt_MIMG *enc = &iFmt->iFmt_MIMG; - IsaDecodeMethod method = tableSubDecode_OP_MIMG[enc->OP]; - return (this->*method)(iFmt); - } // subDecode_OP_MIMG - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_CNDMASK_B32(MachInst iFmt) - { - return new Inst_VOP2__V_CNDMASK_B32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_CNDMASK_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ADD_F32(MachInst iFmt) - { - return new Inst_VOP2__V_ADD_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ADD_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUB_F32(MachInst iFmt) - { - return new Inst_VOP2__V_SUB_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUB_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUBREV_F32(MachInst iFmt) - { - return new Inst_VOP2__V_SUBREV_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUBREV_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_LEGACY_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_I32_I24(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_I32_I24(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_I32_I24 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_HI_I32_I24(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_HI_I32_I24(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_HI_I32_I24 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_U32_U24(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_U32_U24(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_U32_U24 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_HI_U32_U24(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_HI_U32_U24(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_HI_U32_U24 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MIN_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MIN_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MIN_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAX_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MAX_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAX_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MIN_I32(MachInst iFmt) - { - return new Inst_VOP2__V_MIN_I32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MIN_I32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAX_I32(MachInst iFmt) - { - return new Inst_VOP2__V_MAX_I32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAX_I32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MIN_U32(MachInst iFmt) - { - return new Inst_VOP2__V_MIN_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MIN_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAX_U32(MachInst iFmt) - { - return new Inst_VOP2__V_MAX_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAX_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_LSHRREV_B32(MachInst iFmt) - { - return new Inst_VOP2__V_LSHRREV_B32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_LSHRREV_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ASHRREV_I32(MachInst iFmt) - { - return new Inst_VOP2__V_ASHRREV_I32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ASHRREV_I32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_LSHLREV_B32(MachInst iFmt) - { - return new Inst_VOP2__V_LSHLREV_B32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_LSHLREV_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_AND_B32(MachInst iFmt) - { - return new Inst_VOP2__V_AND_B32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_AND_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_OR_B32(MachInst iFmt) - { - return new Inst_VOP2__V_OR_B32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_OR_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_XOR_B32(MachInst iFmt) - { - return new Inst_VOP2__V_XOR_B32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_XOR_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAC_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MAC_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAC_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MADMK_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MADMK_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MADMK_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MADAK_F32(MachInst iFmt) - { - return new Inst_VOP2__V_MADAK_F32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MADAK_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ADD_U32(MachInst iFmt) - { - return new Inst_VOP2__V_ADD_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ADD_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUB_U32(MachInst iFmt) - { - return new Inst_VOP2__V_SUB_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUB_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUBREV_U32(MachInst iFmt) - { - return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUBREV_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ADDC_U32(MachInst iFmt) - { - return new Inst_VOP2__V_ADDC_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ADDC_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUBB_U32(MachInst iFmt) - { - return new Inst_VOP2__V_SUBB_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUBB_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUBBREV_U32(MachInst iFmt) - { - return new Inst_VOP2__V_SUBBREV_U32(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUBBREV_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ADD_F16(MachInst iFmt) - { - return new Inst_VOP2__V_ADD_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ADD_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUB_F16(MachInst iFmt) - { - return new Inst_VOP2__V_SUB_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUB_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUBREV_F16(MachInst iFmt) - { - return new Inst_VOP2__V_SUBREV_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUBREV_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_F16(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAC_F16(MachInst iFmt) - { - return new Inst_VOP2__V_MAC_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAC_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MADMK_F16(MachInst iFmt) - { - return new Inst_VOP2__V_MADMK_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MADMK_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MADAK_F16(MachInst iFmt) - { - return new Inst_VOP2__V_MADAK_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MADAK_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ADD_U16(MachInst iFmt) - { - return new Inst_VOP2__V_ADD_U16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ADD_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUB_U16(MachInst iFmt) - { - return new Inst_VOP2__V_SUB_U16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUB_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_SUBREV_U16(MachInst iFmt) - { - return new Inst_VOP2__V_SUBREV_U16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_SUBREV_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MUL_LO_U16(MachInst iFmt) - { - return new Inst_VOP2__V_MUL_LO_U16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MUL_LO_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_LSHLREV_B16(MachInst iFmt) - { - return new Inst_VOP2__V_LSHLREV_B16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_LSHLREV_B16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_LSHRREV_B16(MachInst iFmt) - { - return new Inst_VOP2__V_LSHRREV_B16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_LSHRREV_B16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_ASHRREV_I16(MachInst iFmt) - { - return new Inst_VOP2__V_ASHRREV_I16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_ASHRREV_I16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAX_F16(MachInst iFmt) - { - return new Inst_VOP2__V_MAX_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAX_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MIN_F16(MachInst iFmt) - { - return new Inst_VOP2__V_MIN_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MIN_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAX_U16(MachInst iFmt) - { - return new Inst_VOP2__V_MAX_U16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAX_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MAX_I16(MachInst iFmt) - { - return new Inst_VOP2__V_MAX_I16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MAX_I16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MIN_U16(MachInst iFmt) - { - return new Inst_VOP2__V_MIN_U16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MIN_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_MIN_I16(MachInst iFmt) - { - return new Inst_VOP2__V_MIN_I16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_MIN_I16 - - GPUStaticInst* - Decoder::decode_OP_VOP2__V_LDEXP_F16(MachInst iFmt) - { - return new Inst_VOP2__V_LDEXP_F16(&iFmt->iFmt_VOP2); - } // decode_OP_VOP2__V_LDEXP_F16 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ADD_U32(MachInst iFmt) - { - return new Inst_SOP2__S_ADD_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ADD_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_SUB_U32(MachInst iFmt) - { - return new Inst_SOP2__S_SUB_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_SUB_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ADD_I32(MachInst iFmt) - { - return new Inst_SOP2__S_ADD_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ADD_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_SUB_I32(MachInst iFmt) - { - return new Inst_SOP2__S_SUB_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_SUB_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ADDC_U32(MachInst iFmt) - { - return new Inst_SOP2__S_ADDC_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ADDC_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_SUBB_U32(MachInst iFmt) - { - return new Inst_SOP2__S_SUBB_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_SUBB_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_MIN_I32(MachInst iFmt) - { - return new Inst_SOP2__S_MIN_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_MIN_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_MIN_U32(MachInst iFmt) - { - return new Inst_SOP2__S_MIN_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_MIN_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_MAX_I32(MachInst iFmt) - { - return new Inst_SOP2__S_MAX_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_MAX_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_MAX_U32(MachInst iFmt) - { - return new Inst_SOP2__S_MAX_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_MAX_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_CSELECT_B32(MachInst iFmt) - { - return new Inst_SOP2__S_CSELECT_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_CSELECT_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_CSELECT_B64(MachInst iFmt) - { - return new Inst_SOP2__S_CSELECT_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_CSELECT_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_AND_B32(MachInst iFmt) - { - return new Inst_SOP2__S_AND_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_AND_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_AND_B64(MachInst iFmt) - { - return new Inst_SOP2__S_AND_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_AND_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_OR_B32(MachInst iFmt) - { - return new Inst_SOP2__S_OR_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_OR_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_OR_B64(MachInst iFmt) - { - return new Inst_SOP2__S_OR_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_OR_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_XOR_B32(MachInst iFmt) - { - return new Inst_SOP2__S_XOR_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_XOR_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_XOR_B64(MachInst iFmt) - { - return new Inst_SOP2__S_XOR_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_XOR_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ANDN2_B32(MachInst iFmt) - { - return new Inst_SOP2__S_ANDN2_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ANDN2_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ANDN2_B64(MachInst iFmt) - { - return new Inst_SOP2__S_ANDN2_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ANDN2_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ORN2_B32(MachInst iFmt) - { - return new Inst_SOP2__S_ORN2_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ORN2_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ORN2_B64(MachInst iFmt) - { - return new Inst_SOP2__S_ORN2_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ORN2_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_NAND_B32(MachInst iFmt) - { - return new Inst_SOP2__S_NAND_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_NAND_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_NAND_B64(MachInst iFmt) - { - return new Inst_SOP2__S_NAND_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_NAND_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_NOR_B32(MachInst iFmt) - { - return new Inst_SOP2__S_NOR_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_NOR_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_NOR_B64(MachInst iFmt) - { - return new Inst_SOP2__S_NOR_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_NOR_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_XNOR_B32(MachInst iFmt) - { - return new Inst_SOP2__S_XNOR_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_XNOR_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_XNOR_B64(MachInst iFmt) - { - return new Inst_SOP2__S_XNOR_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_XNOR_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_LSHL_B32(MachInst iFmt) - { - return new Inst_SOP2__S_LSHL_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_LSHL_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_LSHL_B64(MachInst iFmt) - { - return new Inst_SOP2__S_LSHL_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_LSHL_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_LSHR_B32(MachInst iFmt) - { - return new Inst_SOP2__S_LSHR_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_LSHR_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_LSHR_B64(MachInst iFmt) - { - return new Inst_SOP2__S_LSHR_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_LSHR_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ASHR_I32(MachInst iFmt) - { - return new Inst_SOP2__S_ASHR_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ASHR_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ASHR_I64(MachInst iFmt) - { - return new Inst_SOP2__S_ASHR_I64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ASHR_I64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_BFM_B32(MachInst iFmt) - { - return new Inst_SOP2__S_BFM_B32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_BFM_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_BFM_B64(MachInst iFmt) - { - return new Inst_SOP2__S_BFM_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_BFM_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_MUL_I32(MachInst iFmt) - { - return new Inst_SOP2__S_MUL_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_MUL_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_BFE_U32(MachInst iFmt) - { - return new Inst_SOP2__S_BFE_U32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_BFE_U32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_BFE_I32(MachInst iFmt) - { - return new Inst_SOP2__S_BFE_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_BFE_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_BFE_U64(MachInst iFmt) - { - return new Inst_SOP2__S_BFE_U64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_BFE_U64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_BFE_I64(MachInst iFmt) - { - return new Inst_SOP2__S_BFE_I64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_BFE_I64 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_CBRANCH_G_FORK(MachInst iFmt) - { - return new Inst_SOP2__S_CBRANCH_G_FORK(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_CBRANCH_G_FORK - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_ABSDIFF_I32(MachInst iFmt) - { - return new Inst_SOP2__S_ABSDIFF_I32(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_ABSDIFF_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP2__S_RFE_RESTORE_B64(MachInst iFmt) - { - return new Inst_SOP2__S_RFE_RESTORE_B64(&iFmt->iFmt_SOP2); - } // decode_OP_SOP2__S_RFE_RESTORE_B64 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_MOVK_I32(MachInst iFmt) - { - return new Inst_SOPK__S_MOVK_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_MOVK_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMOVK_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMOVK_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMOVK_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_EQ_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_EQ_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_EQ_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_LG_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_LG_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_LG_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_GT_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_GT_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_GT_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_GE_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_GE_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_GE_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_LT_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_LT_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_LT_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_LE_I32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_LE_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_LE_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_EQ_U32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_EQ_U32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_EQ_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_LG_U32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_LG_U32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_LG_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_GT_U32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_GT_U32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_GT_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_GE_U32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_GE_U32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_GE_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_LT_U32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_LT_U32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_LT_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CMPK_LE_U32(MachInst iFmt) - { - return new Inst_SOPK__S_CMPK_LE_U32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CMPK_LE_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_ADDK_I32(MachInst iFmt) - { - return new Inst_SOPK__S_ADDK_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_ADDK_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_MULK_I32(MachInst iFmt) - { - return new Inst_SOPK__S_MULK_I32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_MULK_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_CBRANCH_I_FORK(MachInst iFmt) - { - return new Inst_SOPK__S_CBRANCH_I_FORK(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_CBRANCH_I_FORK - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_GETREG_B32(MachInst iFmt) - { - return new Inst_SOPK__S_GETREG_B32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_GETREG_B32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_SETREG_B32(MachInst iFmt) - { - return new Inst_SOPK__S_SETREG_B32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_SETREG_B32 - - GPUStaticInst* - Decoder::decode_OP_SOPK__S_SETREG_IMM32_B32(MachInst iFmt) - { - return new Inst_SOPK__S_SETREG_IMM32_B32(&iFmt->iFmt_SOPK); - } // decode_OP_SOPK__S_SETREG_IMM32_B32 - - GPUStaticInst* - Decoder::decode_OP_EXP(MachInst iFmt) - { - return new Inst_EXP__EXP(&iFmt->iFmt_EXP); - } // decode_OP_EXP - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_CLASS_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_CLASS_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_CLASS_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_CLASS_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_CLASS_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_CLASS_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_CLASS_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_CLASS_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_CLASS_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_CLASS_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_CLASS_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_CLASS_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_CLASS_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_CLASS_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_CLASS_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_CLASS_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_CLASS_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_CLASS_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LG_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LG_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LG_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_O_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_O_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_O_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_U_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_U_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_U_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NGE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NGE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NGE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLG_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLG_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLG_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NGT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NGT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NGT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NEQ_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NEQ_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NEQ_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_TRU_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_TRU_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_TRU_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LG_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LG_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LG_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_O_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_O_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_O_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_U_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_U_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_U_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NGE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NGE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NGE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLG_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLG_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLG_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NGT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NGT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NGT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NEQ_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NEQ_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NEQ_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_TRU_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_TRU_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_TRU_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LG_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LG_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LG_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_O_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_O_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_O_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_U_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_U_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_U_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NGE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NGE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NGE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLG_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLG_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLG_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NGT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NGT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NGT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NEQ_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NEQ_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NEQ_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_TRU_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_TRU_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_TRU_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LG_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LG_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LG_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_O_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_O_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_O_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_U_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_U_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_U_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NGE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NGE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NGE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLG_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLG_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLG_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NGT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NGT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NGT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NEQ_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NEQ_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NEQ_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_TRU_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_TRU_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_TRU_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LG_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LG_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LG_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_O_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_O_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_O_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_U_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_U_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_U_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NGE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NGE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NGE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLG_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLG_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLG_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NGT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NGT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NGT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NEQ_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NEQ_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NEQ_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NLT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NLT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NLT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_TRU_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_TRU_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_TRU_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LG_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LG_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LG_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_O_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_O_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_O_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_U_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_U_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_U_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NGE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NGE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NGE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLG_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLG_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLG_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NGT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NGT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NGT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NEQ_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NEQ_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NEQ_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NLT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NLT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NLT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_TRU_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_TRU_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_TRU_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NE_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NE_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NE_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_T_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_T_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_T_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NE_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NE_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NE_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_T_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_T_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_T_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NE_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NE_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NE_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_T_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_T_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_T_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NE_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NE_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NE_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_T_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_T_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_T_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_T_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_T_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_T_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_T_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_T_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_T_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_T_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_T_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_T_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_T_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_T_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_T_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NE_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NE_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NE_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_T_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_T_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_T_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_F_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_F_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_F_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LT_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LT_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LT_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_EQ_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_EQ_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_EQ_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_LE_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_LE_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_LE_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GT_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GT_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GT_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_NE_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_NE_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_NE_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_GE_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_GE_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_GE_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMP_T_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMP_T_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMP_T_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NE_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NE_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NE_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_T_I64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_T_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_T_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_F_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_F_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_F_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LT_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LT_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LT_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_EQ_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_EQ_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_EQ_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_LE_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_LE_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_LE_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GT_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GT_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GT_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_NE_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_NE_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_NE_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_GE_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_GE_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_GE_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CMPX_T_U64(MachInst iFmt) - { - return new Inst_VOP3__V_CMPX_T_U64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CMPX_T_U64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CNDMASK_B32(MachInst iFmt) - { - return new Inst_VOP3__V_CNDMASK_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CNDMASK_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADD_F32(MachInst iFmt) - { - return new Inst_VOP3__V_ADD_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ADD_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUB_F32(MachInst iFmt) - { - return new Inst_VOP3__V_SUB_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SUB_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBREV_F32(MachInst iFmt) - { - return new Inst_VOP3__V_SUBREV_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SUBREV_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_LEGACY_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_I32_I24(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_I32_I24(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_I32_I24 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_HI_I32_I24(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_HI_I32_I24(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_HI_I32_I24 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_U32_U24(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_U32_U24(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_U32_U24 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_HI_U32_U24(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_HI_U32_U24(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_HI_U32_U24 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LSHRREV_B32(MachInst iFmt) - { - return new Inst_VOP3__V_LSHRREV_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LSHRREV_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ASHRREV_I32(MachInst iFmt) - { - return new Inst_VOP3__V_ASHRREV_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ASHRREV_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LSHLREV_B32(MachInst iFmt) - { - return new Inst_VOP3__V_LSHLREV_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LSHLREV_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_AND_B32(MachInst iFmt) - { - return new Inst_VOP3__V_AND_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_AND_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_OR_B32(MachInst iFmt) - { - return new Inst_VOP3__V_OR_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_OR_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_XOR_B32(MachInst iFmt) - { - return new Inst_VOP3__V_XOR_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_XOR_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAC_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MAC_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAC_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADD_U32(MachInst iFmt) - { - return new Inst_VOP3__V_ADD_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_ADD_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUB_U32(MachInst iFmt) - { - return new Inst_VOP3__V_SUB_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_SUB_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBREV_U32(MachInst iFmt) - { - return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_SUBREV_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADDC_U32(MachInst iFmt) - { - return new Inst_VOP3__V_ADDC_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_ADDC_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBB_U32(MachInst iFmt) - { - return new Inst_VOP3__V_SUBB_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_SUBB_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBBREV_U32(MachInst iFmt) - { - return new Inst_VOP3__V_SUBBREV_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_SUBBREV_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADD_F16(MachInst iFmt) - { - return new Inst_VOP3__V_ADD_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ADD_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUB_F16(MachInst iFmt) - { - return new Inst_VOP3__V_SUB_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SUB_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBREV_F16(MachInst iFmt) - { - return new Inst_VOP3__V_SUBREV_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SUBREV_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_F16(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAC_F16(MachInst iFmt) - { - return new Inst_VOP3__V_MAC_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAC_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADD_U16(MachInst iFmt) - { - return new Inst_VOP3__V_ADD_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ADD_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUB_U16(MachInst iFmt) - { - return new Inst_VOP3__V_SUB_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SUB_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SUBREV_U16(MachInst iFmt) - { - return new Inst_VOP3__V_SUBREV_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SUBREV_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_LO_U16(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_LO_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_LO_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LSHLREV_B16(MachInst iFmt) - { - return new Inst_VOP3__V_LSHLREV_B16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LSHLREV_B16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LSHRREV_B16(MachInst iFmt) - { - return new Inst_VOP3__V_LSHRREV_B16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LSHRREV_B16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ASHRREV_I16(MachInst iFmt) - { - return new Inst_VOP3__V_ASHRREV_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ASHRREV_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_F16(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_F16(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_U16(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_I16(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_U16(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_I16(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LDEXP_F16(MachInst iFmt) - { - return new Inst_VOP3__V_LDEXP_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LDEXP_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_NOP(MachInst iFmt) - { - return new Inst_VOP3__V_NOP(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_NOP - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MOV_B32(MachInst iFmt) - { - return new Inst_VOP3__V_MOV_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MOV_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_I32_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_I32_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_I32_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F64_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F64_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F64_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_U32_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_U32_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_U32_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_I32_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_I32_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_I32_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MOV_FED_B32(MachInst iFmt) - { - return new Inst_VOP3__V_MOV_FED_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MOV_FED_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F16_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F16_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F16_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_RPI_I32_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_RPI_I32_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_RPI_I32_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_FLR_I32_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_FLR_I32_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_FLR_I32_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_OFF_F32_I4(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_OFF_F32_I4(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_OFF_F32_I4 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F64_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F64_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F64_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE0(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_UBYTE0(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_UBYTE0 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE1(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_UBYTE1(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_UBYTE1 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE2(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_UBYTE2(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_UBYTE2 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F32_UBYTE3(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F32_UBYTE3(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F32_UBYTE3 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_U32_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_U32_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_U32_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F64_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F64_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F64_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_TRUNC_F64(MachInst iFmt) - { - return new Inst_VOP3__V_TRUNC_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_TRUNC_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CEIL_F64(MachInst iFmt) - { - return new Inst_VOP3__V_CEIL_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CEIL_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RNDNE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_RNDNE_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RNDNE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FLOOR_F64(MachInst iFmt) - { - return new Inst_VOP3__V_FLOOR_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FLOOR_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FRACT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_FRACT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FRACT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_TRUNC_F32(MachInst iFmt) - { - return new Inst_VOP3__V_TRUNC_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_TRUNC_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CEIL_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CEIL_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CEIL_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RNDNE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_RNDNE_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RNDNE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FLOOR_F32(MachInst iFmt) - { - return new Inst_VOP3__V_FLOOR_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FLOOR_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_EXP_F32(MachInst iFmt) - { - return new Inst_VOP3__V_EXP_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_EXP_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LOG_F32(MachInst iFmt) - { - return new Inst_VOP3__V_LOG_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LOG_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RCP_F32(MachInst iFmt) - { - return new Inst_VOP3__V_RCP_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RCP_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RCP_IFLAG_F32(MachInst iFmt) - { - return new Inst_VOP3__V_RCP_IFLAG_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RCP_IFLAG_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RSQ_F32(MachInst iFmt) - { - return new Inst_VOP3__V_RSQ_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RSQ_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RCP_F64(MachInst iFmt) - { - return new Inst_VOP3__V_RCP_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RCP_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RSQ_F64(MachInst iFmt) - { - return new Inst_VOP3__V_RSQ_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RSQ_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SQRT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_SQRT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SQRT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SQRT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_SQRT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SQRT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SIN_F32(MachInst iFmt) - { - return new Inst_VOP3__V_SIN_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SIN_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_COS_F32(MachInst iFmt) - { - return new Inst_VOP3__V_COS_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_COS_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_NOT_B32(MachInst iFmt) - { - return new Inst_VOP3__V_NOT_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_NOT_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_BFREV_B32(MachInst iFmt) - { - return new Inst_VOP3__V_BFREV_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_BFREV_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FFBH_U32(MachInst iFmt) - { - return new Inst_VOP3__V_FFBH_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FFBH_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FFBL_B32(MachInst iFmt) - { - return new Inst_VOP3__V_FFBL_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FFBL_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FFBH_I32(MachInst iFmt) - { - return new Inst_VOP3__V_FFBH_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FFBH_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FREXP_EXP_I32_F64(MachInst iFmt) - { - return new Inst_VOP3__V_FREXP_EXP_I32_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FREXP_EXP_I32_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FREXP_MANT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_FREXP_MANT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FREXP_MANT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FRACT_F64(MachInst iFmt) - { - return new Inst_VOP3__V_FRACT_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FRACT_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FREXP_EXP_I32_F32(MachInst iFmt) - { - return new Inst_VOP3__V_FREXP_EXP_I32_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FREXP_EXP_I32_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FREXP_MANT_F32(MachInst iFmt) - { - return new Inst_VOP3__V_FREXP_MANT_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FREXP_MANT_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CLREXCP(MachInst iFmt) - { - return new Inst_VOP3__V_CLREXCP(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CLREXCP - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F16_U16(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F16_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F16_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_F16_I16(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_F16_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_F16_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_U16_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_U16_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_U16_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_I16_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_I16_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_I16_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RCP_F16(MachInst iFmt) - { - return new Inst_VOP3__V_RCP_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RCP_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SQRT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_SQRT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SQRT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RSQ_F16(MachInst iFmt) - { - return new Inst_VOP3__V_RSQ_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RSQ_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LOG_F16(MachInst iFmt) - { - return new Inst_VOP3__V_LOG_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LOG_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_EXP_F16(MachInst iFmt) - { - return new Inst_VOP3__V_EXP_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_EXP_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FREXP_MANT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_FREXP_MANT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FREXP_MANT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FREXP_EXP_I16_F16(MachInst iFmt) - { - return new Inst_VOP3__V_FREXP_EXP_I16_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FREXP_EXP_I16_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FLOOR_F16(MachInst iFmt) - { - return new Inst_VOP3__V_FLOOR_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FLOOR_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CEIL_F16(MachInst iFmt) - { - return new Inst_VOP3__V_CEIL_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CEIL_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_TRUNC_F16(MachInst iFmt) - { - return new Inst_VOP3__V_TRUNC_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_TRUNC_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_RNDNE_F16(MachInst iFmt) - { - return new Inst_VOP3__V_RNDNE_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_RNDNE_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FRACT_F16(MachInst iFmt) - { - return new Inst_VOP3__V_FRACT_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FRACT_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SIN_F16(MachInst iFmt) - { - return new Inst_VOP3__V_SIN_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SIN_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_COS_F16(MachInst iFmt) - { - return new Inst_VOP3__V_COS_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_COS_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_EXP_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP3__V_EXP_LEGACY_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_EXP_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LOG_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP3__V_LOG_LEGACY_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LOG_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_LEGACY_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_I32_I24(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_I32_I24(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_I32_I24 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_U32_U24(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_U32_U24(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_U32_U24 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CUBEID_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CUBEID_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CUBEID_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CUBESC_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CUBESC_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CUBESC_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CUBETC_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CUBETC_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CUBETC_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CUBEMA_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CUBEMA_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CUBEMA_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_BFE_U32(MachInst iFmt) - { - return new Inst_VOP3__V_BFE_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_BFE_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_BFE_I32(MachInst iFmt) - { - return new Inst_VOP3__V_BFE_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_BFE_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_BFI_B32(MachInst iFmt) - { - return new Inst_VOP3__V_BFI_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_BFI_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FMA_F32(MachInst iFmt) - { - return new Inst_VOP3__V_FMA_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FMA_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FMA_F64(MachInst iFmt) - { - return new Inst_VOP3__V_FMA_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FMA_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LERP_U8(MachInst iFmt) - { - return new Inst_VOP3__V_LERP_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LERP_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ALIGNBIT_B32(MachInst iFmt) - { - return new Inst_VOP3__V_ALIGNBIT_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ALIGNBIT_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ALIGNBYTE_B32(MachInst iFmt) - { - return new Inst_VOP3__V_ALIGNBYTE_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ALIGNBYTE_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN3_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MIN3_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN3_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN3_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MIN3_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN3_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN3_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MIN3_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN3_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX3_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MAX3_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX3_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX3_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MAX3_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX3_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX3_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MAX3_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX3_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MED3_F32(MachInst iFmt) - { - return new Inst_VOP3__V_MED3_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MED3_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MED3_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MED3_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MED3_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MED3_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MED3_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MED3_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SAD_U8(MachInst iFmt) - { - return new Inst_VOP3__V_SAD_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SAD_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SAD_HI_U8(MachInst iFmt) - { - return new Inst_VOP3__V_SAD_HI_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SAD_HI_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SAD_U16(MachInst iFmt) - { - return new Inst_VOP3__V_SAD_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SAD_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_SAD_U32(MachInst iFmt) - { - return new Inst_VOP3__V_SAD_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_SAD_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PK_U8_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PK_U8_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PK_U8_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F32(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_FIXUP_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_DIV_FIXUP_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F64(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_FIXUP_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_DIV_FIXUP_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_SCALE_F32(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_SCALE_F32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_DIV_SCALE_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_SCALE_F64(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_SCALE_F64(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_DIV_SCALE_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_FMAS_F32(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_FMAS_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_DIV_FMAS_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_FMAS_F64(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_FMAS_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_DIV_FMAS_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MSAD_U8(MachInst iFmt) - { - return new Inst_VOP3__V_MSAD_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MSAD_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_QSAD_PK_U16_U8(MachInst iFmt) - { - return new Inst_VOP3__V_QSAD_PK_U16_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_QSAD_PK_U16_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MQSAD_PK_U16_U8(MachInst iFmt) - { - return new Inst_VOP3__V_MQSAD_PK_U16_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MQSAD_PK_U16_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MQSAD_U32_U8(MachInst iFmt) - { - return new Inst_VOP3__V_MQSAD_U32_U8(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MQSAD_U32_U8 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_U64_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_U64_U32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_MAD_U64_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_I64_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_I64_I32(&iFmt->iFmt_VOP3_SDST_ENC); - } // decode_OPU_VOP3__V_MAD_I64_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_F16(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_U16(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_U16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_U16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAD_I16(MachInst iFmt) - { - return new Inst_VOP3__V_MAD_I16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAD_I16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_PERM_B32(MachInst iFmt) - { - return new Inst_VOP3__V_PERM_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_PERM_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_FMA_F16(MachInst iFmt) - { - return new Inst_VOP3__V_FMA_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_FMA_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F16(MachInst iFmt) - { - return new Inst_VOP3__V_DIV_FIXUP_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_DIV_FIXUP_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PKACCUM_U8_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PKACCUM_U8_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PKACCUM_U8_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_INTERP_P1_F32(MachInst iFmt) - { - return new Inst_VOP3__V_INTERP_P1_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_INTERP_P1_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_INTERP_P2_F32(MachInst iFmt) - { - return new Inst_VOP3__V_INTERP_P2_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_INTERP_P2_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_INTERP_MOV_F32(MachInst iFmt) - { - return new Inst_VOP3__V_INTERP_MOV_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_INTERP_MOV_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_INTERP_P1LL_F16(MachInst iFmt) - { - return new Inst_VOP3__V_INTERP_P1LL_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_INTERP_P1LL_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_INTERP_P1LV_F16(MachInst iFmt) - { - return new Inst_VOP3__V_INTERP_P1LV_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_INTERP_P1LV_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_INTERP_P2_F16(MachInst iFmt) - { - return new Inst_VOP3__V_INTERP_P2_F16(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_INTERP_P2_F16 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ADD_F64(MachInst iFmt) - { - return new Inst_VOP3__V_ADD_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ADD_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_F64(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MIN_F64(MachInst iFmt) - { - return new Inst_VOP3__V_MIN_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MIN_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MAX_F64(MachInst iFmt) - { - return new Inst_VOP3__V_MAX_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MAX_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LDEXP_F64(MachInst iFmt) - { - return new Inst_VOP3__V_LDEXP_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LDEXP_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_LO_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_LO_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_LO_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_HI_U32(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_HI_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_HI_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MUL_HI_I32(MachInst iFmt) - { - return new Inst_VOP3__V_MUL_HI_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MUL_HI_I32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LDEXP_F32(MachInst iFmt) - { - return new Inst_VOP3__V_LDEXP_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LDEXP_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_READLANE_B32(MachInst iFmt) - { - return new Inst_VOP3__V_READLANE_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_READLANE_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_WRITELANE_B32(MachInst iFmt) - { - return new Inst_VOP3__V_WRITELANE_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_WRITELANE_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_BCNT_U32_B32(MachInst iFmt) - { - return new Inst_VOP3__V_BCNT_U32_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_BCNT_U32_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MBCNT_LO_U32_B32(MachInst iFmt) - { - return new Inst_VOP3__V_MBCNT_LO_U32_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MBCNT_LO_U32_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_MBCNT_HI_U32_B32(MachInst iFmt) - { - return new Inst_VOP3__V_MBCNT_HI_U32_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_MBCNT_HI_U32_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LSHLREV_B64(MachInst iFmt) - { - return new Inst_VOP3__V_LSHLREV_B64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LSHLREV_B64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_LSHRREV_B64(MachInst iFmt) - { - return new Inst_VOP3__V_LSHRREV_B64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_LSHRREV_B64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_ASHRREV_I64(MachInst iFmt) - { - return new Inst_VOP3__V_ASHRREV_I64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_ASHRREV_I64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_TRIG_PREOP_F64(MachInst iFmt) - { - return new Inst_VOP3__V_TRIG_PREOP_F64(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_TRIG_PREOP_F64 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_BFM_B32(MachInst iFmt) - { - return new Inst_VOP3__V_BFM_B32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_BFM_B32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PKNORM_I16_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PKNORM_I16_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PKNORM_I16_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PKNORM_U16_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PKNORM_U16_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PKNORM_U16_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PKRTZ_F16_F32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PKRTZ_F16_F32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PKRTZ_F16_F32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PK_U16_U32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PK_U16_U32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PK_U16_U32 - - GPUStaticInst* - Decoder::decode_OPU_VOP3__V_CVT_PK_I16_I32(MachInst iFmt) - { - return new Inst_VOP3__V_CVT_PK_I16_I32(&iFmt->iFmt_VOP3); - } // decode_OPU_VOP3__V_CVT_PK_I16_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_U32(MachInst iFmt) - { - return new Inst_DS__DS_ADD_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SUB_U32(MachInst iFmt) - { - return new Inst_DS__DS_SUB_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SUB_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_RSUB_U32(MachInst iFmt) - { - return new Inst_DS__DS_RSUB_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_RSUB_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_INC_U32(MachInst iFmt) - { - return new Inst_DS__DS_INC_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_INC_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_DEC_U32(MachInst iFmt) - { - return new Inst_DS__DS_DEC_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_DEC_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_I32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_I32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_I32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_I32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_U32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_U32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_AND_B32(MachInst iFmt) - { - return new Inst_DS__DS_AND_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_AND_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_OR_B32(MachInst iFmt) - { - return new Inst_DS__DS_OR_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_OR_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_XOR_B32(MachInst iFmt) - { - return new Inst_DS__DS_XOR_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_XOR_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MSKOR_B32(MachInst iFmt) - { - return new Inst_DS__DS_MSKOR_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MSKOR_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE2_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRITE2_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE2_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE2ST64_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRITE2ST64_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE2ST64_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_B32(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_F32(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_F32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_F32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_NOP(MachInst iFmt) - { - return new Inst_DS__DS_NOP(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_NOP - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_F32(MachInst iFmt) - { - return new Inst_DS__DS_ADD_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_B8(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_B8(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_B8 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_B16(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_B16(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_B16 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_ADD_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SUB_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_SUB_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SUB_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_RSUB_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_RSUB_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_RSUB_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_INC_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_INC_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_INC_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_DEC_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_DEC_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_DEC_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_RTN_I32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_RTN_I32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_RTN_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_RTN_I32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_RTN_I32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_RTN_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_RTN_U32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_RTN_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_RTN_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_AND_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_AND_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_AND_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_OR_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_OR_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_OR_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_XOR_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_XOR_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_XOR_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MSKOR_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_MSKOR_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MSKOR_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRXCHG_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRXCHG_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRXCHG_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRXCHG2_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRXCHG2_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRXCHG2_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRXCHG2ST64_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRXCHG2ST64_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRXCHG2ST64_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_RTN_F32(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_RTN_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_RTN_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_RTN_F32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_RTN_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_RTN_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_RTN_F32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_RTN_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_RTN_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRAP_RTN_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRAP_RTN_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRAP_RTN_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_RTN_F32(MachInst iFmt) - { - return new Inst_DS__DS_ADD_RTN_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_RTN_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_B32(MachInst iFmt) - { - return new Inst_DS__DS_READ_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ2_B32(MachInst iFmt) - { - return new Inst_DS__DS_READ2_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ2_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ2ST64_B32(MachInst iFmt) - { - return new Inst_DS__DS_READ2ST64_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ2ST64_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_I8(MachInst iFmt) - { - return new Inst_DS__DS_READ_I8(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_I8 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_U8(MachInst iFmt) - { - return new Inst_DS__DS_READ_U8(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_U8 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_I16(MachInst iFmt) - { - return new Inst_DS__DS_READ_I16(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_I16 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_U16(MachInst iFmt) - { - return new Inst_DS__DS_READ_U16(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_U16 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SWIZZLE_B32(MachInst iFmt) - { - return new Inst_DS__DS_SWIZZLE_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SWIZZLE_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_PERMUTE_B32(MachInst iFmt) - { - return new Inst_DS__DS_PERMUTE_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_PERMUTE_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_BPERMUTE_B32(MachInst iFmt) - { - return new Inst_DS__DS_BPERMUTE_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_BPERMUTE_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_U64(MachInst iFmt) - { - return new Inst_DS__DS_ADD_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SUB_U64(MachInst iFmt) - { - return new Inst_DS__DS_SUB_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SUB_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_RSUB_U64(MachInst iFmt) - { - return new Inst_DS__DS_RSUB_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_RSUB_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_INC_U64(MachInst iFmt) - { - return new Inst_DS__DS_INC_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_INC_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_DEC_U64(MachInst iFmt) - { - return new Inst_DS__DS_DEC_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_DEC_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_I64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_I64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_I64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_I64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_I64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_I64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_U64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_U64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_AND_B64(MachInst iFmt) - { - return new Inst_DS__DS_AND_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_AND_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_OR_B64(MachInst iFmt) - { - return new Inst_DS__DS_OR_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_OR_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_XOR_B64(MachInst iFmt) - { - return new Inst_DS__DS_XOR_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_XOR_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MSKOR_B64(MachInst iFmt) - { - return new Inst_DS__DS_MSKOR_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MSKOR_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE2_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRITE2_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE2_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE2ST64_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRITE2ST64_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE2ST64_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_B64(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_F64(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_F64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_F64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_ADD_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SUB_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_SUB_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SUB_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_RSUB_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_RSUB_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_RSUB_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_INC_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_INC_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_INC_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_DEC_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_DEC_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_DEC_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_RTN_I64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_RTN_I64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_RTN_I64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_RTN_I64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_RTN_I64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_RTN_I64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_RTN_U64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_RTN_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_RTN_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_AND_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_AND_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_AND_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_OR_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_OR_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_OR_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_XOR_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_XOR_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_XOR_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MSKOR_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_MSKOR_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MSKOR_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRXCHG_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRXCHG_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRXCHG_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRXCHG2_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRXCHG2_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRXCHG2_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRXCHG2ST64_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRXCHG2ST64_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRXCHG2ST64_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CMPST_RTN_F64(MachInst iFmt) - { - return new Inst_DS__DS_CMPST_RTN_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CMPST_RTN_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_RTN_F64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_RTN_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_RTN_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_RTN_F64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_RTN_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_RTN_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_B64(MachInst iFmt) - { - return new Inst_DS__DS_READ_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ2_B64(MachInst iFmt) - { - return new Inst_DS__DS_READ2_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ2_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ2ST64_B64(MachInst iFmt) - { - return new Inst_DS__DS_READ2ST64_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ2ST64_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CONDXCHG32_RTN_B64(MachInst iFmt) - { - return new Inst_DS__DS_CONDXCHG32_RTN_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CONDXCHG32_RTN_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_ADD_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SUB_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_SUB_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SUB_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_RSUB_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_RSUB_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_RSUB_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_INC_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_INC_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_INC_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_DEC_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_DEC_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_DEC_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_SRC2_I32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_SRC2_I32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_SRC2_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_SRC2_I32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_SRC2_I32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_SRC2_I32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_SRC2_U32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_SRC2_U32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_SRC2_U32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_AND_SRC2_B32(MachInst iFmt) - { - return new Inst_DS__DS_AND_SRC2_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_AND_SRC2_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_OR_SRC2_B32(MachInst iFmt) - { - return new Inst_DS__DS_OR_SRC2_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_OR_SRC2_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_XOR_SRC2_B32(MachInst iFmt) - { - return new Inst_DS__DS_XOR_SRC2_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_XOR_SRC2_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_SRC2_B32(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_SRC2_B32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_SRC2_B32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_SRC2_F32(MachInst iFmt) - { - return new Inst_DS__DS_MIN_SRC2_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_SRC2_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_SRC2_F32(MachInst iFmt) - { - return new Inst_DS__DS_MAX_SRC2_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_SRC2_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_SRC2_F32(MachInst iFmt) - { - return new Inst_DS__DS_ADD_SRC2_F32(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_SRC2_F32 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_GWS_SEMA_RELEASE_ALL(MachInst iFmt) - { - return new Inst_DS__DS_GWS_SEMA_RELEASE_ALL(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_GWS_SEMA_RELEASE_ALL - - GPUStaticInst* - Decoder::decode_OP_DS__DS_GWS_INIT(MachInst iFmt) - { - return new Inst_DS__DS_GWS_INIT(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_GWS_INIT - - GPUStaticInst* - Decoder::decode_OP_DS__DS_GWS_SEMA_V(MachInst iFmt) - { - return new Inst_DS__DS_GWS_SEMA_V(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_GWS_SEMA_V - - GPUStaticInst* - Decoder::decode_OP_DS__DS_GWS_SEMA_BR(MachInst iFmt) - { - return new Inst_DS__DS_GWS_SEMA_BR(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_GWS_SEMA_BR - - GPUStaticInst* - Decoder::decode_OP_DS__DS_GWS_SEMA_P(MachInst iFmt) - { - return new Inst_DS__DS_GWS_SEMA_P(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_GWS_SEMA_P - - GPUStaticInst* - Decoder::decode_OP_DS__DS_GWS_BARRIER(MachInst iFmt) - { - return new Inst_DS__DS_GWS_BARRIER(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_GWS_BARRIER - - GPUStaticInst* - Decoder::decode_OP_DS__DS_CONSUME(MachInst iFmt) - { - return new Inst_DS__DS_CONSUME(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_CONSUME - - GPUStaticInst* - Decoder::decode_OP_DS__DS_APPEND(MachInst iFmt) - { - return new Inst_DS__DS_APPEND(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_APPEND - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ORDERED_COUNT(MachInst iFmt) - { - return new Inst_DS__DS_ORDERED_COUNT(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ORDERED_COUNT - - GPUStaticInst* - Decoder::decode_OP_DS__DS_ADD_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_ADD_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_ADD_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_SUB_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_SUB_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_SUB_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_RSUB_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_RSUB_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_RSUB_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_INC_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_INC_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_INC_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_DEC_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_DEC_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_DEC_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_SRC2_I64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_SRC2_I64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_SRC2_I64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_SRC2_I64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_SRC2_I64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_SRC2_I64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_SRC2_U64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_SRC2_U64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_SRC2_U64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_AND_SRC2_B64(MachInst iFmt) - { - return new Inst_DS__DS_AND_SRC2_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_AND_SRC2_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_OR_SRC2_B64(MachInst iFmt) - { - return new Inst_DS__DS_OR_SRC2_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_OR_SRC2_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_XOR_SRC2_B64(MachInst iFmt) - { - return new Inst_DS__DS_XOR_SRC2_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_XOR_SRC2_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_SRC2_B64(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_SRC2_B64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_SRC2_B64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MIN_SRC2_F64(MachInst iFmt) - { - return new Inst_DS__DS_MIN_SRC2_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MIN_SRC2_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_MAX_SRC2_F64(MachInst iFmt) - { - return new Inst_DS__DS_MAX_SRC2_F64(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_MAX_SRC2_F64 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_B96(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_B96(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_B96 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_WRITE_B128(MachInst iFmt) - { - return new Inst_DS__DS_WRITE_B128(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_WRITE_B128 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_B96(MachInst iFmt) - { - return new Inst_DS__DS_READ_B96(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_B96 - - GPUStaticInst* - Decoder::decode_OP_DS__DS_READ_B128(MachInst iFmt) - { - return new Inst_DS__DS_READ_B128(&iFmt->iFmt_DS); - } // decode_OP_DS__DS_READ_B128 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_UBYTE(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_UBYTE(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_UBYTE - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_SBYTE(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_SBYTE(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_SBYTE - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_USHORT(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_USHORT(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_USHORT - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_SSHORT(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_SSHORT(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_SSHORT - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_DWORD(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_DWORD(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_DWORD - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_DWORDX2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_DWORDX2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_DWORDX3(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_DWORDX3(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_DWORDX3 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_LOAD_DWORDX4(MachInst iFmt) - { - return new Inst_FLAT__FLAT_LOAD_DWORDX4(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_LOAD_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_STORE_BYTE(MachInst iFmt) - { - return new Inst_FLAT__FLAT_STORE_BYTE(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_STORE_BYTE - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_STORE_SHORT(MachInst iFmt) - { - return new Inst_FLAT__FLAT_STORE_SHORT(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_STORE_SHORT - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_STORE_DWORD(MachInst iFmt) - { - return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_STORE_DWORD - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_STORE_DWORDX2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_STORE_DWORDX2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_STORE_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_STORE_DWORDX3(MachInst iFmt) - { - return new Inst_FLAT__FLAT_STORE_DWORDX3(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_STORE_DWORDX3 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_STORE_DWORDX4(MachInst iFmt) - { - return new Inst_FLAT__FLAT_STORE_DWORDX4(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_STORE_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SWAP(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SWAP(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SWAP - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_CMPSWAP(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_ADD(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_ADD - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SUB(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SUB(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SUB - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMIN(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SMIN(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SMIN - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMIN(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_UMIN(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_UMIN - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMAX(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SMAX(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SMAX - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMAX(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_UMAX(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_UMAX - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_AND(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_AND(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_AND - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_OR(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_OR(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_OR - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_XOR(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_XOR(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_XOR - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_INC(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_INC(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_INC - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_DEC(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_DEC(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_DEC - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SWAP_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_ADD_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_ADD_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SUB_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SUB_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SUB_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMIN_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SMIN_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SMIN_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMIN_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_UMIN_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_UMIN_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_SMAX_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_SMAX_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_SMAX_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_UMAX_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_UMAX_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_UMAX_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_AND_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_AND_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_AND_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_OR_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_OR_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_OR_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_XOR_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_XOR_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_XOR_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_INC_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_INC_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_INC_X2 - - GPUStaticInst* - Decoder::decode_OP_FLAT__FLAT_ATOMIC_DEC_X2(MachInst iFmt) - { - return new Inst_FLAT__FLAT_ATOMIC_DEC_X2(&iFmt->iFmt_FLAT); - } // decode_OP_FLAT__FLAT_ATOMIC_DEC_X2 - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_LOAD(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_LOAD(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_LOAD - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_LOAD_MIP(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_LOAD_MIP(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_LOAD_MIP - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_LOAD_PCK(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_LOAD_PCK(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_LOAD_PCK - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_LOAD_PCK_SGN(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_LOAD_PCK_SGN(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_LOAD_PCK_SGN - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_LOAD_MIP_PCK(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_LOAD_MIP_PCK(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_LOAD_MIP_PCK - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_LOAD_MIP_PCK_SGN(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_STORE(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_STORE(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_STORE - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_STORE_MIP(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_STORE_MIP(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_STORE_MIP - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_STORE_PCK(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_STORE_PCK(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_STORE_PCK - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_STORE_MIP_PCK(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_STORE_MIP_PCK(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_STORE_MIP_PCK - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GET_RESINFO(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GET_RESINFO(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GET_RESINFO - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SWAP(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_SWAP(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_SWAP - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_CMPSWAP(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_CMPSWAP - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_ADD(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_ADD(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_ADD - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SUB(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_SUB(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_SUB - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SMIN(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_SMIN(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_SMIN - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_UMIN(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_UMIN(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_UMIN - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_SMAX(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_SMAX(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_SMAX - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_UMAX(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_UMAX(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_UMAX - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_AND(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_AND(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_AND - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_OR(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_OR(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_OR - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_XOR(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_XOR(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_XOR - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_INC(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_INC(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_INC - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_ATOMIC_DEC(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_ATOMIC_DEC(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_ATOMIC_DEC - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_D(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_D - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_D_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_D_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_L(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_L(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_L - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_B(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_B - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_B_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_B_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_LZ(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_LZ(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_LZ - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_D(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_D - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_D_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_L(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_L(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_L - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_B(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_B - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_B_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_LZ(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_LZ(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_LZ - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_D_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_D_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_D_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_D_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_D_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_L_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_L_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_L_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_B_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_B_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_B_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_B_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_B_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_LZ_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_LZ_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_LZ_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_D_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_D_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_L_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_L_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_L_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_B_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_B_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_LZ_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_LZ_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4 - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_L(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_L(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_L - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_B(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_B(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_B - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_B_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_B_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_B_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_LZ(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_LZ(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_LZ - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_L(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_L(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_L - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_B(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_B - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_B_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_B_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_LZ(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_LZ(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_LZ - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_L_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_L_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_L_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_B_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_B_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_B_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_B_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_B_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_B_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_LZ_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_LZ_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_LZ_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_L_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_L_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_L_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_B_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_B_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_B_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_B_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GATHER4_C_LZ_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GATHER4_C_LZ_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GATHER4_C_LZ_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_GET_LOD(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_GET_LOD(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_GET_LOD - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_CD(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_CD - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_CD_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_CD_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_CD(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_CD - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_CD_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_CD_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_CD_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_CD_CL_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_CD_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_CD_O - - GPUStaticInst* - Decoder::decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL_O(MachInst iFmt) - { - return new Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(&iFmt->iFmt_MIMG); - } // decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_X(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_X - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XY(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XY - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_X(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_X(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_X - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XY(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XY - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZ(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZW(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_X(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(MachInst iFmt) - { - return new Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - GPUStaticInst* - Decoder::decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(MachInst iFmt) - { - return new - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(&iFmt->iFmt_MTBUF); - } // decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_X(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_X(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_X - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XY(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XY - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZ(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZW(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_X(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_X(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_X - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_XY(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_XY(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_XY - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZ(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZ - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZW(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZW - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_X(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_X(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_X - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XY(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_UBYTE(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_UBYTE(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_UBYTE - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_SBYTE(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_SBYTE(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_SBYTE - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_USHORT(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_USHORT(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_USHORT - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_SSHORT(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_SSHORT(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_SSHORT - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORD(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_DWORD(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_DWORD - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORDX2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_DWORDX2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORDX3(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_DWORDX3(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_DWORDX3 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_LOAD_DWORDX4(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_LOAD_DWORDX4(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_LOAD_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_BYTE(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_BYTE(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_BYTE - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_SHORT(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_SHORT(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_SHORT - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORD(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_DWORD(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_DWORD - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORDX2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_DWORDX2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORDX3(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_DWORDX3(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_DWORDX3 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_DWORDX4(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_DWORDX4(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_STORE_LDS_DWORD(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_STORE_LDS_DWORD(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_STORE_LDS_DWORD - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_WBINVL1(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_WBINVL1(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_WBINVL1 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_WBINVL1_VOL(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_WBINVL1_VOL(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_WBINVL1_VOL - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SWAP(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SWAP(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SWAP - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_ADD(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_ADD(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_ADD - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SUB(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SUB(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SUB - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMIN(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SMIN(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SMIN - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMIN(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_UMIN(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_UMIN - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMAX(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SMAX(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SMAX - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMAX(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_UMAX(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_UMAX - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_AND(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_AND(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_AND - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_OR(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_OR(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_OR - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_XOR(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_XOR(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_XOR - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_INC(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_INC(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_INC - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_DEC(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_DEC(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_DEC - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SWAP_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_ADD_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_ADD_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SUB_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SUB_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMIN_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMIN_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_SMAX_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_UMAX_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_AND_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_AND_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_AND_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_OR_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_OR_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_OR_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_XOR_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_XOR_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_INC_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_INC_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_INC_X2 - - GPUStaticInst* - Decoder::decode_OP_MUBUF__BUFFER_ATOMIC_DEC_X2(MachInst iFmt) - { - return new Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(&iFmt->iFmt_MUBUF); - } // decode_OP_MUBUF__BUFFER_ATOMIC_DEC_X2 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_LOAD_DWORD(MachInst iFmt) - { - return new Inst_SMEM__S_LOAD_DWORD(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_LOAD_DWORD - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_LOAD_DWORDX2(MachInst iFmt) - { - return new Inst_SMEM__S_LOAD_DWORDX2(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_LOAD_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_LOAD_DWORDX4(MachInst iFmt) - { - return new Inst_SMEM__S_LOAD_DWORDX4(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_LOAD_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_LOAD_DWORDX8(MachInst iFmt) - { - return new Inst_SMEM__S_LOAD_DWORDX8(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_LOAD_DWORDX8 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_LOAD_DWORDX16(MachInst iFmt) - { - return new Inst_SMEM__S_LOAD_DWORDX16(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_LOAD_DWORDX16 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORD(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_LOAD_DWORD(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_LOAD_DWORD - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX2(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_LOAD_DWORDX2(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_LOAD_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX4(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_LOAD_DWORDX4(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_LOAD_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX8(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_LOAD_DWORDX8(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_LOAD_DWORDX8 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_LOAD_DWORDX16(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_LOAD_DWORDX16(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_LOAD_DWORDX16 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_STORE_DWORD(MachInst iFmt) - { - return new Inst_SMEM__S_STORE_DWORD(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_STORE_DWORD - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_STORE_DWORDX2(MachInst iFmt) - { - return new Inst_SMEM__S_STORE_DWORDX2(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_STORE_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_STORE_DWORDX4(MachInst iFmt) - { - return new Inst_SMEM__S_STORE_DWORDX4(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_STORE_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_STORE_DWORD(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_STORE_DWORD(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_STORE_DWORD - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_STORE_DWORDX2(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_STORE_DWORDX2(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_STORE_DWORDX2 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_BUFFER_STORE_DWORDX4(MachInst iFmt) - { - return new Inst_SMEM__S_BUFFER_STORE_DWORDX4(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_BUFFER_STORE_DWORDX4 - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_DCACHE_INV(MachInst iFmt) - { - return new Inst_SMEM__S_DCACHE_INV(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_DCACHE_INV - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_DCACHE_WB(MachInst iFmt) - { - return new Inst_SMEM__S_DCACHE_WB(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_DCACHE_WB - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_DCACHE_INV_VOL(MachInst iFmt) - { - return new Inst_SMEM__S_DCACHE_INV_VOL(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_DCACHE_INV_VOL - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_DCACHE_WB_VOL(MachInst iFmt) - { - return new Inst_SMEM__S_DCACHE_WB_VOL(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_DCACHE_WB_VOL - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_MEMTIME(MachInst iFmt) - { - return new Inst_SMEM__S_MEMTIME(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_MEMTIME - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_MEMREALTIME(MachInst iFmt) - { - return new Inst_SMEM__S_MEMREALTIME(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_MEMREALTIME - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_ATC_PROBE(MachInst iFmt) - { - return new Inst_SMEM__S_ATC_PROBE(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_ATC_PROBE - - GPUStaticInst* - Decoder::decode_OP_SMEM__S_ATC_PROBE_BUFFER(MachInst iFmt) - { - return new Inst_SMEM__S_ATC_PROBE_BUFFER(&iFmt->iFmt_SMEM); - } // decode_OP_SMEM__S_ATC_PROBE_BUFFER - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOV_B32(MachInst iFmt) - { - return new Inst_SOP1__S_MOV_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOV_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOV_B64(MachInst iFmt) - { - return new Inst_SOP1__S_MOV_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOV_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_CMOV_B32(MachInst iFmt) - { - return new Inst_SOP1__S_CMOV_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_CMOV_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_CMOV_B64(MachInst iFmt) - { - return new Inst_SOP1__S_CMOV_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_CMOV_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_NOT_B32(MachInst iFmt) - { - return new Inst_SOP1__S_NOT_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_NOT_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_NOT_B64(MachInst iFmt) - { - return new Inst_SOP1__S_NOT_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_NOT_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_WQM_B32(MachInst iFmt) - { - return new Inst_SOP1__S_WQM_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_WQM_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_WQM_B64(MachInst iFmt) - { - return new Inst_SOP1__S_WQM_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_WQM_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BREV_B32(MachInst iFmt) - { - return new Inst_SOP1__S_BREV_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BREV_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BREV_B64(MachInst iFmt) - { - return new Inst_SOP1__S_BREV_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BREV_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BCNT0_I32_B32(MachInst iFmt) - { - return new Inst_SOP1__S_BCNT0_I32_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BCNT0_I32_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BCNT0_I32_B64(MachInst iFmt) - { - return new Inst_SOP1__S_BCNT0_I32_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BCNT0_I32_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BCNT1_I32_B32(MachInst iFmt) - { - return new Inst_SOP1__S_BCNT1_I32_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BCNT1_I32_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BCNT1_I32_B64(MachInst iFmt) - { - return new Inst_SOP1__S_BCNT1_I32_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BCNT1_I32_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FF0_I32_B32(MachInst iFmt) - { - return new Inst_SOP1__S_FF0_I32_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FF0_I32_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FF0_I32_B64(MachInst iFmt) - { - return new Inst_SOP1__S_FF0_I32_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FF0_I32_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FF1_I32_B32(MachInst iFmt) - { - return new Inst_SOP1__S_FF1_I32_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FF1_I32_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FF1_I32_B64(MachInst iFmt) - { - return new Inst_SOP1__S_FF1_I32_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FF1_I32_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FLBIT_I32_B32(MachInst iFmt) - { - return new Inst_SOP1__S_FLBIT_I32_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FLBIT_I32_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FLBIT_I32_B64(MachInst iFmt) - { - return new Inst_SOP1__S_FLBIT_I32_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FLBIT_I32_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FLBIT_I32(MachInst iFmt) - { - return new Inst_SOP1__S_FLBIT_I32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FLBIT_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_FLBIT_I32_I64(MachInst iFmt) - { - return new Inst_SOP1__S_FLBIT_I32_I64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_FLBIT_I32_I64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_SEXT_I32_I8(MachInst iFmt) - { - return new Inst_SOP1__S_SEXT_I32_I8(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_SEXT_I32_I8 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_SEXT_I32_I16(MachInst iFmt) - { - return new Inst_SOP1__S_SEXT_I32_I16(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_SEXT_I32_I16 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BITSET0_B32(MachInst iFmt) - { - return new Inst_SOP1__S_BITSET0_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BITSET0_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BITSET0_B64(MachInst iFmt) - { - return new Inst_SOP1__S_BITSET0_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BITSET0_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BITSET1_B32(MachInst iFmt) - { - return new Inst_SOP1__S_BITSET1_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BITSET1_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_BITSET1_B64(MachInst iFmt) - { - return new Inst_SOP1__S_BITSET1_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_BITSET1_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_GETPC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_GETPC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_GETPC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_SETPC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_SETPC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_SETPC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_SWAPPC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_SWAPPC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_SWAPPC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_RFE_B64(MachInst iFmt) - { - return new Inst_SOP1__S_RFE_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_RFE_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_AND_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_AND_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_AND_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_OR_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_OR_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_OR_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_XOR_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_XOR_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_XOR_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_ANDN2_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_ANDN2_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_ANDN2_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_ORN2_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_ORN2_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_ORN2_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_NAND_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_NAND_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_NAND_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_NOR_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_NOR_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_NOR_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_XNOR_SAVEEXEC_B64(MachInst iFmt) - { - return new Inst_SOP1__S_XNOR_SAVEEXEC_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_XNOR_SAVEEXEC_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_QUADMASK_B32(MachInst iFmt) - { - return new Inst_SOP1__S_QUADMASK_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_QUADMASK_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_QUADMASK_B64(MachInst iFmt) - { - return new Inst_SOP1__S_QUADMASK_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_QUADMASK_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOVRELS_B32(MachInst iFmt) - { - return new Inst_SOP1__S_MOVRELS_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOVRELS_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOVRELS_B64(MachInst iFmt) - { - return new Inst_SOP1__S_MOVRELS_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOVRELS_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOVRELD_B32(MachInst iFmt) - { - return new Inst_SOP1__S_MOVRELD_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOVRELD_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOVRELD_B64(MachInst iFmt) - { - return new Inst_SOP1__S_MOVRELD_B64(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOVRELD_B64 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_CBRANCH_JOIN(MachInst iFmt) - { - return new Inst_SOP1__S_CBRANCH_JOIN(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_CBRANCH_JOIN - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_ABS_I32(MachInst iFmt) - { - return new Inst_SOP1__S_ABS_I32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_ABS_I32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_MOV_FED_B32(MachInst iFmt) - { - return new Inst_SOP1__S_MOV_FED_B32(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_MOV_FED_B32 - - GPUStaticInst* - Decoder::decode_OP_SOP1__S_SET_GPR_IDX_IDX(MachInst iFmt) - { - return new Inst_SOP1__S_SET_GPR_IDX_IDX(&iFmt->iFmt_SOP1); - } // decode_OP_SOP1__S_SET_GPR_IDX_IDX - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_EQ_I32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_EQ_I32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_EQ_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LG_I32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LG_I32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LG_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_GT_I32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_GT_I32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_GT_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_GE_I32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_GE_I32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_GE_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LT_I32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LT_I32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LT_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LE_I32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LE_I32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LE_I32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_EQ_U32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_EQ_U32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_EQ_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LG_U32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LG_U32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LG_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_GT_U32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_GT_U32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_GT_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_GE_U32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_GE_U32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_GE_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LT_U32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LT_U32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LT_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LE_U32(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LE_U32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LE_U32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_BITCMP0_B32(MachInst iFmt) - { - return new Inst_SOPC__S_BITCMP0_B32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_BITCMP0_B32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_BITCMP1_B32(MachInst iFmt) - { - return new Inst_SOPC__S_BITCMP1_B32(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_BITCMP1_B32 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_BITCMP0_B64(MachInst iFmt) - { - return new Inst_SOPC__S_BITCMP0_B64(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_BITCMP0_B64 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_BITCMP1_B64(MachInst iFmt) - { - return new Inst_SOPC__S_BITCMP1_B64(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_BITCMP1_B64 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_SETVSKIP(MachInst iFmt) - { - return new Inst_SOPC__S_SETVSKIP(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_SETVSKIP - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_SET_GPR_IDX_ON(MachInst iFmt) - { - return new Inst_SOPC__S_SET_GPR_IDX_ON(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_SET_GPR_IDX_ON - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_EQ_U64(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_EQ_U64(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_EQ_U64 - - GPUStaticInst* - Decoder::decode_OP_SOPC__S_CMP_LG_U64(MachInst iFmt) - { - return new Inst_SOPC__S_CMP_LG_U64(&iFmt->iFmt_SOPC); - } // decode_OP_SOPC__S_CMP_LG_U64 - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_NOP(MachInst iFmt) - { - return new Inst_SOPP__S_NOP(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_NOP - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_ENDPGM(MachInst iFmt) - { - return new Inst_SOPP__S_ENDPGM(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_ENDPGM - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_BRANCH(MachInst iFmt) - { - return new Inst_SOPP__S_BRANCH(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_BRANCH - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_WAKEUP(MachInst iFmt) - { - return new Inst_SOPP__S_WAKEUP(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_WAKEUP - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_SCC0(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_SCC0(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_SCC0 - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_SCC1(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_SCC1(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_SCC1 - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_VCCZ(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_VCCZ(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_VCCZ - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_VCCNZ(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_VCCNZ(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_VCCNZ - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_EXECZ(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_EXECZ(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_EXECZ - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_EXECNZ(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_EXECNZ(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_EXECNZ - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_BARRIER(MachInst iFmt) - { - return new Inst_SOPP__S_BARRIER(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_BARRIER - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SETKILL(MachInst iFmt) - { - return new Inst_SOPP__S_SETKILL(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SETKILL - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_WAITCNT(MachInst iFmt) - { - return new Inst_SOPP__S_WAITCNT(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_WAITCNT - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SETHALT(MachInst iFmt) - { - return new Inst_SOPP__S_SETHALT(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SETHALT - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SLEEP(MachInst iFmt) - { - return new Inst_SOPP__S_SLEEP(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SLEEP - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SETPRIO(MachInst iFmt) - { - return new Inst_SOPP__S_SETPRIO(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SETPRIO - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SENDMSG(MachInst iFmt) - { - return new Inst_SOPP__S_SENDMSG(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SENDMSG - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SENDMSGHALT(MachInst iFmt) - { - return new Inst_SOPP__S_SENDMSGHALT(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SENDMSGHALT - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_TRAP(MachInst iFmt) - { - return new Inst_SOPP__S_TRAP(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_TRAP - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_ICACHE_INV(MachInst iFmt) - { - return new Inst_SOPP__S_ICACHE_INV(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_ICACHE_INV - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_INCPERFLEVEL(MachInst iFmt) - { - return new Inst_SOPP__S_INCPERFLEVEL(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_INCPERFLEVEL - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_DECPERFLEVEL(MachInst iFmt) - { - return new Inst_SOPP__S_DECPERFLEVEL(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_DECPERFLEVEL - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_TTRACEDATA(MachInst iFmt) - { - return new Inst_SOPP__S_TTRACEDATA(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_TTRACEDATA - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_CDBGSYS(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_CDBGSYS(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_CDBGSYS - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_CDBGUSER(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_CDBGUSER(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_CDBGUSER - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_CDBGSYS_OR_USER(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_CBRANCH_CDBGSYS_AND_USER(MachInst iFmt) - { - return new Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_ENDPGM_SAVED(MachInst iFmt) - { - return new Inst_SOPP__S_ENDPGM_SAVED(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_ENDPGM_SAVED - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SET_GPR_IDX_OFF(MachInst iFmt) - { - return new Inst_SOPP__S_SET_GPR_IDX_OFF(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SET_GPR_IDX_OFF - - GPUStaticInst* - Decoder::decode_OP_SOPP__S_SET_GPR_IDX_MODE(MachInst iFmt) - { - return new Inst_SOPP__S_SET_GPR_IDX_MODE(&iFmt->iFmt_SOPP); - } // decode_OP_SOPP__S_SET_GPR_IDX_MODE - - GPUStaticInst* - Decoder::decode_OP_VINTRP__V_INTERP_P1_F32(MachInst iFmt) - { - return new Inst_VINTRP__V_INTERP_P1_F32(&iFmt->iFmt_VINTRP); - } // decode_OP_VINTRP__V_INTERP_P1_F32 - - GPUStaticInst* - Decoder::decode_OP_VINTRP__V_INTERP_P2_F32(MachInst iFmt) - { - return new Inst_VINTRP__V_INTERP_P2_F32(&iFmt->iFmt_VINTRP); - } // decode_OP_VINTRP__V_INTERP_P2_F32 - - GPUStaticInst* - Decoder::decode_OP_VINTRP__V_INTERP_MOV_F32(MachInst iFmt) - { - return new Inst_VINTRP__V_INTERP_MOV_F32(&iFmt->iFmt_VINTRP); - } // decode_OP_VINTRP__V_INTERP_MOV_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_NOP(MachInst iFmt) - { - return new Inst_VOP1__V_NOP(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_NOP - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_MOV_B32(MachInst iFmt) - { - return new Inst_VOP1__V_MOV_B32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_MOV_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_READFIRSTLANE_B32(MachInst iFmt) - { - return new Inst_VOP1__V_READFIRSTLANE_B32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_READFIRSTLANE_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_I32_F64(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_I32_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_I32_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F64_I32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F64_I32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F64_I32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_I32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_I32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_I32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_U32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_U32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_U32_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_U32_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_U32_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_I32_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_I32_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_I32_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_MOV_FED_B32(MachInst iFmt) - { - return new Inst_VOP1__V_MOV_FED_B32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_MOV_FED_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F16_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F16_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F16_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_F16(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_RPI_I32_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_RPI_I32_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_RPI_I32_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_FLR_I32_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_FLR_I32_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_FLR_I32_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_OFF_F32_I4(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_OFF_F32_I4(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_OFF_F32_I4 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_F64(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F64_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F64_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F64_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE0(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_UBYTE0(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_UBYTE0 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE1(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_UBYTE1(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_UBYTE1 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE2(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_UBYTE2(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_UBYTE2 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F32_UBYTE3(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F32_UBYTE3(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F32_UBYTE3 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_U32_F64(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_U32_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_U32_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F64_U32(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F64_U32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F64_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_TRUNC_F64(MachInst iFmt) - { - return new Inst_VOP1__V_TRUNC_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_TRUNC_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CEIL_F64(MachInst iFmt) - { - return new Inst_VOP1__V_CEIL_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CEIL_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RNDNE_F64(MachInst iFmt) - { - return new Inst_VOP1__V_RNDNE_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RNDNE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FLOOR_F64(MachInst iFmt) - { - return new Inst_VOP1__V_FLOOR_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FLOOR_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FRACT_F32(MachInst iFmt) - { - return new Inst_VOP1__V_FRACT_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FRACT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_TRUNC_F32(MachInst iFmt) - { - return new Inst_VOP1__V_TRUNC_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_TRUNC_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CEIL_F32(MachInst iFmt) - { - return new Inst_VOP1__V_CEIL_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CEIL_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RNDNE_F32(MachInst iFmt) - { - return new Inst_VOP1__V_RNDNE_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RNDNE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FLOOR_F32(MachInst iFmt) - { - return new Inst_VOP1__V_FLOOR_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FLOOR_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_EXP_F32(MachInst iFmt) - { - return new Inst_VOP1__V_EXP_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_EXP_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_LOG_F32(MachInst iFmt) - { - return new Inst_VOP1__V_LOG_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_LOG_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RCP_F32(MachInst iFmt) - { - return new Inst_VOP1__V_RCP_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RCP_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RCP_IFLAG_F32(MachInst iFmt) - { - return new Inst_VOP1__V_RCP_IFLAG_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RCP_IFLAG_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RSQ_F32(MachInst iFmt) - { - return new Inst_VOP1__V_RSQ_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RSQ_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RCP_F64(MachInst iFmt) - { - return new Inst_VOP1__V_RCP_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RCP_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RSQ_F64(MachInst iFmt) - { - return new Inst_VOP1__V_RSQ_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RSQ_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_SQRT_F32(MachInst iFmt) - { - return new Inst_VOP1__V_SQRT_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_SQRT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_SQRT_F64(MachInst iFmt) - { - return new Inst_VOP1__V_SQRT_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_SQRT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_SIN_F32(MachInst iFmt) - { - return new Inst_VOP1__V_SIN_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_SIN_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_COS_F32(MachInst iFmt) - { - return new Inst_VOP1__V_COS_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_COS_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_NOT_B32(MachInst iFmt) - { - return new Inst_VOP1__V_NOT_B32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_NOT_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_BFREV_B32(MachInst iFmt) - { - return new Inst_VOP1__V_BFREV_B32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_BFREV_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FFBH_U32(MachInst iFmt) - { - return new Inst_VOP1__V_FFBH_U32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FFBH_U32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FFBL_B32(MachInst iFmt) - { - return new Inst_VOP1__V_FFBL_B32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FFBL_B32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FFBH_I32(MachInst iFmt) - { - return new Inst_VOP1__V_FFBH_I32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FFBH_I32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FREXP_EXP_I32_F64(MachInst iFmt) - { - return new Inst_VOP1__V_FREXP_EXP_I32_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FREXP_EXP_I32_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FREXP_MANT_F64(MachInst iFmt) - { - return new Inst_VOP1__V_FREXP_MANT_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FREXP_MANT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FRACT_F64(MachInst iFmt) - { - return new Inst_VOP1__V_FRACT_F64(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FRACT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FREXP_EXP_I32_F32(MachInst iFmt) - { - return new Inst_VOP1__V_FREXP_EXP_I32_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FREXP_EXP_I32_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FREXP_MANT_F32(MachInst iFmt) - { - return new Inst_VOP1__V_FREXP_MANT_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FREXP_MANT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CLREXCP(MachInst iFmt) - { - return new Inst_VOP1__V_CLREXCP(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CLREXCP - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F16_U16(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F16_U16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F16_U16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_F16_I16(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_F16_I16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_F16_I16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_U16_F16(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_U16_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_U16_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CVT_I16_F16(MachInst iFmt) - { - return new Inst_VOP1__V_CVT_I16_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CVT_I16_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RCP_F16(MachInst iFmt) - { - return new Inst_VOP1__V_RCP_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RCP_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_SQRT_F16(MachInst iFmt) - { - return new Inst_VOP1__V_SQRT_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_SQRT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RSQ_F16(MachInst iFmt) - { - return new Inst_VOP1__V_RSQ_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RSQ_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_LOG_F16(MachInst iFmt) - { - return new Inst_VOP1__V_LOG_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_LOG_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_EXP_F16(MachInst iFmt) - { - return new Inst_VOP1__V_EXP_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_EXP_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FREXP_MANT_F16(MachInst iFmt) - { - return new Inst_VOP1__V_FREXP_MANT_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FREXP_MANT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FREXP_EXP_I16_F16(MachInst iFmt) - { - return new Inst_VOP1__V_FREXP_EXP_I16_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FREXP_EXP_I16_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FLOOR_F16(MachInst iFmt) - { - return new Inst_VOP1__V_FLOOR_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FLOOR_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_CEIL_F16(MachInst iFmt) - { - return new Inst_VOP1__V_CEIL_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_CEIL_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_TRUNC_F16(MachInst iFmt) - { - return new Inst_VOP1__V_TRUNC_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_TRUNC_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_RNDNE_F16(MachInst iFmt) - { - return new Inst_VOP1__V_RNDNE_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_RNDNE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_FRACT_F16(MachInst iFmt) - { - return new Inst_VOP1__V_FRACT_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_FRACT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_SIN_F16(MachInst iFmt) - { - return new Inst_VOP1__V_SIN_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_SIN_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_COS_F16(MachInst iFmt) - { - return new Inst_VOP1__V_COS_F16(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_COS_F16 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_EXP_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP1__V_EXP_LEGACY_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_EXP_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OP_VOP1__V_LOG_LEGACY_F32(MachInst iFmt) - { - return new Inst_VOP1__V_LOG_LEGACY_F32(&iFmt->iFmt_VOP1); - } // decode_OP_VOP1__V_LOG_LEGACY_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_CLASS_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_CLASS_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_CLASS_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_CLASS_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_CLASS_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_CLASS_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_CLASS_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_CLASS_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_CLASS_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_CLASS_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_CLASS_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_CLASS_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_CLASS_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_CLASS_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_CLASS_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_CLASS_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_CLASS_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_CLASS_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LG_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LG_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LG_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_O_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_O_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_O_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_U_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_U_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_U_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NGE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NGE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NGE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLG_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLG_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLG_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NGT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NGT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NGT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NEQ_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NEQ_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NEQ_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_TRU_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_TRU_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_TRU_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LG_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LG_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LG_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_O_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_O_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_O_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_U_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_U_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_U_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NGE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NGE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NGE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLG_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLG_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLG_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NGT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NGT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NGT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLE_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLE_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLE_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NEQ_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NEQ_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NEQ_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLT_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLT_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLT_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_TRU_F16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_TRU_F16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_TRU_F16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LG_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LG_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LG_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_O_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_O_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_O_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_U_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_U_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_U_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NGE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NGE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NGE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLG_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLG_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLG_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NGT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NGT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NGT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NEQ_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NEQ_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NEQ_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_TRU_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_TRU_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_TRU_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LG_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LG_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LG_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_O_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_O_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_O_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_U_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_U_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_U_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NGE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NGE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NGE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLG_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLG_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLG_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NGT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NGT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NGT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLE_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLE_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLE_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NEQ_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NEQ_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NEQ_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLT_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLT_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLT_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_TRU_F32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_TRU_F32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_TRU_F32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LG_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LG_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LG_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_O_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_O_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_O_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_U_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_U_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_U_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NGE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NGE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NGE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLG_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLG_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLG_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NGT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NGT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NGT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NEQ_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NEQ_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NEQ_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NLT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NLT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NLT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_TRU_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_TRU_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_TRU_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LG_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LG_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LG_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_O_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_O_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_O_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_U_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_U_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_U_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NGE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NGE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NGE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLG_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLG_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLG_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NGT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NGT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NGT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLE_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLE_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLE_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NEQ_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NEQ_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NEQ_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NLT_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NLT_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NLT_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_TRU_F64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_TRU_F64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_TRU_F64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NE_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NE_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NE_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_T_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_T_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_T_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NE_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NE_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NE_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_T_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_T_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_T_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NE_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NE_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NE_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_T_I16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_T_I16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_T_I16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NE_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NE_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NE_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_T_U16(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_T_U16(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_T_U16 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NE_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NE_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NE_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_T_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_T_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_T_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NE_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NE_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NE_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_T_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_T_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_T_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NE_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NE_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NE_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_T_I32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_T_I32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_T_I32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NE_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NE_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NE_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_T_U32(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_T_U32(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_T_U32 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NE_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NE_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NE_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_T_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_T_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_T_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_F_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_F_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_F_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LT_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LT_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LT_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_EQ_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_EQ_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_EQ_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_LE_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_LE_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_LE_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GT_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GT_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GT_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_NE_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_NE_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_NE_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_GE_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_GE_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_GE_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMP_T_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMP_T_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMP_T_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NE_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NE_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NE_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_T_I64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_T_I64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_T_I64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_F_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_F_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_F_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LT_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LT_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LT_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_EQ_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_EQ_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_EQ_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_LE_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_LE_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_LE_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GT_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GT_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GT_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_NE_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_NE_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_NE_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_GE_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_GE_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_GE_U64 - - GPUStaticInst* - Decoder::decode_OP_VOPC__V_CMPX_T_U64(MachInst iFmt) - { - return new Inst_VOPC__V_CMPX_T_U64(&iFmt->iFmt_VOPC); - } // decode_OP_VOPC__V_CMPX_T_U64 - - GPUStaticInst* - Decoder::decode_invalid(MachInst iFmt) - { - fatal("Invalid opcode encountered: %#x\n", iFmt->imm_u32); - - return nullptr; - } -} // namespace Gcn3ISA -} // namespace gem5 diff --git a/src/arch/amdgpu/gcn3/gpu_decoder.hh b/src/arch/amdgpu/gcn3/gpu_decoder.hh deleted file mode 100644 index 1dadae6f30..0000000000 --- a/src/arch/amdgpu/gcn3/gpu_decoder.hh +++ /dev/null @@ -1,1676 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_DECODER_HH__ -#define __ARCH_GCN3_DECODER_HH__ - -#include -#include - -#include "arch/amdgpu/gcn3/gpu_types.hh" - -namespace gem5 -{ - -class GPUStaticInst; - -namespace Gcn3ISA -{ - class Decoder; - union InstFormat; - - using IsaDecodeMethod = GPUStaticInst*(Decoder::*)(MachInst); - - class Decoder - { - public: - Decoder(); - ~Decoder(); - - GPUStaticInst* decode(MachInst mach_inst); - - private: - static IsaDecodeMethod tableDecodePrimary[512]; - static IsaDecodeMethod tableSubDecode_OPU_VOP3[768]; - static IsaDecodeMethod tableSubDecode_OP_DS[256]; - static IsaDecodeMethod tableSubDecode_OP_FLAT[128]; - static IsaDecodeMethod tableSubDecode_OP_MIMG[128]; - static IsaDecodeMethod tableSubDecode_OP_MTBUF[16]; - static IsaDecodeMethod tableSubDecode_OP_MUBUF[128]; - static IsaDecodeMethod tableSubDecode_OP_SMEM[64]; - static IsaDecodeMethod tableSubDecode_OP_SOP1[256]; - static IsaDecodeMethod tableSubDecode_OP_SOPC[128]; - static IsaDecodeMethod tableSubDecode_OP_SOPP[128]; - static IsaDecodeMethod tableSubDecode_OP_VINTRP[4]; - static IsaDecodeMethod tableSubDecode_OP_VOP1[256]; - static IsaDecodeMethod tableSubDecode_OP_VOPC[256]; - - GPUStaticInst* decode_OPU_VOP3__V_ADDC_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADD_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADD_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADD_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADD_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ADD_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ALIGNBIT_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ALIGNBYTE_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_AND_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ASHRREV_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ASHRREV_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_ASHRREV_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_BCNT_U32_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_BFE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_BFE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_BFI_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_BFM_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_BFREV_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CEIL_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CEIL_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CEIL_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CLREXCP(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_CLASS_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_CLASS_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_CLASS_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_EQ_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_F_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GE_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_GT_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LE_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LG_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LG_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LG_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_LT_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NEQ_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NEQ_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NEQ_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NE_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NE_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NE_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NE_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NGE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NGE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NGE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NGT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NGT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NGT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLG_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLG_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLG_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_NLT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_O_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_O_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_O_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_TRU_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_TRU_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_TRU_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_T_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_T_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_T_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_T_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_T_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_T_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_U_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_U_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMPX_U_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_CLASS_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_CLASS_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_CLASS_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_EQ_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_F_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GE_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_GT_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LE_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LG_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LG_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LG_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_LT_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NEQ_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NEQ_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NEQ_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NE_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NE_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NE_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NE_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NE_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NE_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NGE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NGE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NGE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NGT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NGT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NGT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLG_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLG_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLG_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_NLT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_O_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_O_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_O_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_TRU_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_TRU_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_TRU_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_T_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_T_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_T_I64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_T_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_T_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_T_U64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_U_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_U_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CMP_U_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CNDMASK_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_COS_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_COS_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CUBEID_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CUBEMA_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CUBESC_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CUBETC_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F16_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F16_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F16_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_UBYTE0(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_UBYTE1(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_UBYTE2(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F32_UBYTE3(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F64_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F64_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_F64_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_FLR_I32_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_I16_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_I32_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_I32_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_OFF_F32_I4(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PKACCUM_U8_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PKNORM_I16_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PKNORM_U16_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PKRTZ_F16_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PK_I16_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PK_U16_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_PK_U8_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_RPI_I32_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_U16_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_U32_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_CVT_U32_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_FIXUP_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_FIXUP_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_FIXUP_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_FMAS_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_FMAS_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_SCALE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_DIV_SCALE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_EXP_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_EXP_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_EXP_LEGACY_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FFBH_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FFBH_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FFBL_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FLOOR_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FLOOR_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FLOOR_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FMA_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FMA_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FMA_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FRACT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FRACT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FRACT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FREXP_EXP_I16_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FREXP_EXP_I32_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FREXP_EXP_I32_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FREXP_MANT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FREXP_MANT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_FREXP_MANT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_INTERP_MOV_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_INTERP_P1LL_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_INTERP_P1LV_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_INTERP_P1_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_INTERP_P2_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_INTERP_P2_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LDEXP_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LDEXP_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LDEXP_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LERP_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LOG_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LOG_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LOG_LEGACY_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LSHLREV_B16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LSHLREV_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LSHLREV_B64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LSHRREV_B16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LSHRREV_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_LSHRREV_B64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAC_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAC_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_I32_I24(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_I64_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_LEGACY_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_U32_U24(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAD_U64_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX3_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX3_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX3_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MAX_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MBCNT_HI_U32_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MBCNT_LO_U32_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MED3_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MED3_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MED3_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN3_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN3_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN3_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_I16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MIN_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MOV_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MOV_FED_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MQSAD_PK_U16_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MQSAD_U32_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MSAD_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_HI_I32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_HI_I32_I24(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_HI_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_HI_U32_U24(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_I32_I24(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_LEGACY_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_LO_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_LO_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_MUL_U32_U24(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_NOP(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_NOT_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_OR_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_PERM_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_QSAD_PK_U16_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RCP_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RCP_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RCP_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RCP_IFLAG_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_READLANE_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RNDNE_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RNDNE_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RNDNE_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RSQ_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RSQ_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_RSQ_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SAD_HI_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SAD_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SAD_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SAD_U8(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SIN_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SIN_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SQRT_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SQRT_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SQRT_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBBREV_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBB_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBREV_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBREV_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUB_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUB_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUB_U16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_SUB_U32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_TRIG_PREOP_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_TRUNC_F16(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_TRUNC_F32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_TRUNC_F64(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_WRITELANE_B32(MachInst); - GPUStaticInst* decode_OPU_VOP3__V_XOR_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_RTN_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_SRC2_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_ADD_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_AND_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_AND_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_AND_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_AND_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_AND_SRC2_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_AND_SRC2_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_APPEND(MachInst); - GPUStaticInst* decode_OP_DS__DS_BPERMUTE_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_RTN_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_CMPST_RTN_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_CONDXCHG32_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_CONSUME(MachInst); - GPUStaticInst* decode_OP_DS__DS_DEC_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_DEC_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_DEC_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_DEC_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_DEC_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_DEC_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_GWS_BARRIER(MachInst); - GPUStaticInst* decode_OP_DS__DS_GWS_INIT(MachInst); - GPUStaticInst* decode_OP_DS__DS_GWS_SEMA_BR(MachInst); - GPUStaticInst* decode_OP_DS__DS_GWS_SEMA_P(MachInst); - GPUStaticInst* decode_OP_DS__DS_GWS_SEMA_RELEASE_ALL(MachInst); - GPUStaticInst* decode_OP_DS__DS_GWS_SEMA_V(MachInst); - GPUStaticInst* decode_OP_DS__DS_INC_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_INC_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_INC_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_INC_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_INC_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_INC_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_I32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_I64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_RTN_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_RTN_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_RTN_I32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_RTN_I64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_SRC2_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_SRC2_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_SRC2_I32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_SRC2_I64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MAX_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_I32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_I64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_RTN_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_RTN_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_RTN_I32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_RTN_I64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_SRC2_F32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_SRC2_F64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_SRC2_I32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_SRC2_I64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MIN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MSKOR_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MSKOR_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_MSKOR_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_MSKOR_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_NOP(MachInst); - GPUStaticInst* decode_OP_DS__DS_ORDERED_COUNT(MachInst); - GPUStaticInst* decode_OP_DS__DS_OR_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_OR_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_OR_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_OR_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_OR_SRC2_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_OR_SRC2_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_PERMUTE_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ2ST64_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ2ST64_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ2_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ2_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_B128(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_B96(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_I16(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_I8(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_U16(MachInst); - GPUStaticInst* decode_OP_DS__DS_READ_U8(MachInst); - GPUStaticInst* decode_OP_DS__DS_RSUB_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_RSUB_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_RSUB_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_RSUB_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_RSUB_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_RSUB_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_SUB_RTN_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_SUB_RTN_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_SUB_SRC2_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_SUB_SRC2_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_SUB_U32(MachInst); - GPUStaticInst* decode_OP_DS__DS_SUB_U64(MachInst); - GPUStaticInst* decode_OP_DS__DS_SWIZZLE_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRAP_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE2ST64_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE2ST64_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE2_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE2_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_B128(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_B16(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_B8(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_B96(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_SRC2_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRITE_SRC2_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRXCHG2ST64_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRXCHG2ST64_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRXCHG2_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRXCHG2_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRXCHG_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_WRXCHG_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_XOR_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_XOR_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_XOR_RTN_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_XOR_RTN_B64(MachInst); - GPUStaticInst* decode_OP_DS__DS_XOR_SRC2_B32(MachInst); - GPUStaticInst* decode_OP_DS__DS_XOR_SRC2_B64(MachInst); - GPUStaticInst* decode_OP_EXP(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_ADD(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_ADD_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_AND(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_AND_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_DEC(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_DEC_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_INC(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_INC_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_OR(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_OR_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SMAX(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SMAX_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SMIN(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SMIN_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SUB(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SUB_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SWAP(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_UMAX(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_UMAX_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_UMIN(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_UMIN_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_XOR(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_XOR_X2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_DWORD(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_DWORDX2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_DWORDX3(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_DWORDX4(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_SBYTE(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_SSHORT(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_UBYTE(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_LOAD_USHORT(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_STORE_BYTE(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_STORE_DWORD(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_STORE_DWORDX2(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_STORE_DWORDX3(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_STORE_DWORDX4(MachInst); - GPUStaticInst* decode_OP_FLAT__FLAT_STORE_SHORT(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_ADD(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_AND(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_CMPSWAP(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_DEC(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_INC(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_OR(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_SMAX(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_SMIN(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_SUB(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_SWAP(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_UMAX(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_UMIN(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_ATOMIC_XOR(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_B(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_B_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_B_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_B_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_B(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_B_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_B_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_B_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_L(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_LZ(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_LZ_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_L_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_C_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_L(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_LZ(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_LZ_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_L_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GATHER4_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GET_LOD(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_GET_RESINFO(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_LOAD(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_LOAD_MIP(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_LOAD_MIP_PCK(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_LOAD_MIP_PCK_SGN(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_LOAD_PCK(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_LOAD_PCK_SGN(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_B(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_B_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_B_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_B_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_CD(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_CD_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_CD_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_CD_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_B(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_B_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_B_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_CD(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_CD_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_CD_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_D(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_D_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_D_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_L(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_LZ(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_LZ_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_L_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_C_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_D(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_D_CL(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_D_CL_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_D_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_L(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_LZ(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_LZ_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_L_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_SAMPLE_O(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_STORE(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_STORE_MIP(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_STORE_MIP_PCK(MachInst); - GPUStaticInst* decode_OP_MIMG__IMAGE_STORE_PCK(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_X(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XY(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_X(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(MachInst); - GPUStaticInst* - decode_OP_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_X(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XY(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZ(MachInst); - GPUStaticInst* decode_OP_MTBUF__TBUFFER_STORE_FORMAT_XYZW(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_ADD(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_ADD_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_AND(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_AND_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_DEC(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_DEC_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_INC(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_INC_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_OR(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_OR_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SMAX(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SMAX_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SMIN(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SMIN_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SUB(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SUB_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SWAP(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_SWAP_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_UMAX(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_UMAX_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_UMIN(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_UMIN_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_XOR(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_ATOMIC_XOR_X2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_DWORD(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_DWORDX2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_DWORDX3(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_DWORDX4(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_X(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_X(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XY(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZ(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_FORMAT_XYZW(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_SBYTE(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_SSHORT(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_UBYTE(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_LOAD_USHORT(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_BYTE(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_DWORD(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_DWORDX2(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_DWORDX3(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_DWORDX4(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_X(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XY(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_X(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_XY(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZ(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_FORMAT_XYZW(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_LDS_DWORD(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_STORE_SHORT(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_WBINVL1(MachInst); - GPUStaticInst* decode_OP_MUBUF__BUFFER_WBINVL1_VOL(MachInst); - GPUStaticInst* decode_OP_SMEM__S_ATC_PROBE(MachInst); - GPUStaticInst* decode_OP_SMEM__S_ATC_PROBE_BUFFER(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_LOAD_DWORD(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_LOAD_DWORDX16(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_LOAD_DWORDX2(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_LOAD_DWORDX4(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_LOAD_DWORDX8(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_STORE_DWORD(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_STORE_DWORDX2(MachInst); - GPUStaticInst* decode_OP_SMEM__S_BUFFER_STORE_DWORDX4(MachInst); - GPUStaticInst* decode_OP_SMEM__S_DCACHE_INV(MachInst); - GPUStaticInst* decode_OP_SMEM__S_DCACHE_INV_VOL(MachInst); - GPUStaticInst* decode_OP_SMEM__S_DCACHE_WB(MachInst); - GPUStaticInst* decode_OP_SMEM__S_DCACHE_WB_VOL(MachInst); - GPUStaticInst* decode_OP_SMEM__S_LOAD_DWORD(MachInst); - GPUStaticInst* decode_OP_SMEM__S_LOAD_DWORDX16(MachInst); - GPUStaticInst* decode_OP_SMEM__S_LOAD_DWORDX2(MachInst); - GPUStaticInst* decode_OP_SMEM__S_LOAD_DWORDX4(MachInst); - GPUStaticInst* decode_OP_SMEM__S_LOAD_DWORDX8(MachInst); - GPUStaticInst* decode_OP_SMEM__S_MEMREALTIME(MachInst); - GPUStaticInst* decode_OP_SMEM__S_MEMTIME(MachInst); - GPUStaticInst* decode_OP_SMEM__S_STORE_DWORD(MachInst); - GPUStaticInst* decode_OP_SMEM__S_STORE_DWORDX2(MachInst); - GPUStaticInst* decode_OP_SMEM__S_STORE_DWORDX4(MachInst); - GPUStaticInst* decode_OP_SOP1__S_ABS_I32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_ANDN2_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_AND_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BCNT0_I32_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BCNT0_I32_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BCNT1_I32_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BCNT1_I32_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BITSET0_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BITSET0_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BITSET1_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BITSET1_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BREV_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_BREV_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_CBRANCH_JOIN(MachInst); - GPUStaticInst* decode_OP_SOP1__S_CMOV_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_CMOV_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FF0_I32_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FF0_I32_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FF1_I32_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FF1_I32_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FLBIT_I32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FLBIT_I32_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FLBIT_I32_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_FLBIT_I32_I64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_GETPC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOVRELD_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOVRELD_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOVRELS_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOVRELS_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOV_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOV_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_MOV_FED_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_NAND_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_NOR_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_NOT_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_NOT_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_ORN2_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_OR_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_QUADMASK_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_QUADMASK_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_RFE_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_SETPC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_SET_GPR_IDX_IDX(MachInst); - GPUStaticInst* decode_OP_SOP1__S_SEXT_I32_I16(MachInst); - GPUStaticInst* decode_OP_SOP1__S_SEXT_I32_I8(MachInst); - GPUStaticInst* decode_OP_SOP1__S_SWAPPC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_WQM_B32(MachInst); - GPUStaticInst* decode_OP_SOP1__S_WQM_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_XNOR_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP1__S_XOR_SAVEEXEC_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ABSDIFF_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ADDC_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ADD_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ADD_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ANDN2_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ANDN2_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_AND_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_AND_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ASHR_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ASHR_I64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_BFE_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_BFE_I64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_BFE_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_BFE_U64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_BFM_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_BFM_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_CBRANCH_G_FORK(MachInst); - GPUStaticInst* decode_OP_SOP2__S_CSELECT_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_CSELECT_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_LSHL_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_LSHL_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_LSHR_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_LSHR_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_MAX_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_MAX_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_MIN_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_MIN_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_MUL_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_NAND_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_NAND_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_NOR_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_NOR_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ORN2_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_ORN2_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_OR_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_OR_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_RFE_RESTORE_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_SUBB_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_SUB_I32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_SUB_U32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_XNOR_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_XNOR_B64(MachInst); - GPUStaticInst* decode_OP_SOP2__S_XOR_B32(MachInst); - GPUStaticInst* decode_OP_SOP2__S_XOR_B64(MachInst); - GPUStaticInst* decode_OP_SOPC__S_BITCMP0_B32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_BITCMP0_B64(MachInst); - GPUStaticInst* decode_OP_SOPC__S_BITCMP1_B32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_BITCMP1_B64(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_EQ_I32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_EQ_U32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_EQ_U64(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_GE_I32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_GE_U32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_GT_I32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_GT_U32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LE_I32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LE_U32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LG_I32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LG_U32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LG_U64(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LT_I32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_CMP_LT_U32(MachInst); - GPUStaticInst* decode_OP_SOPC__S_SETVSKIP(MachInst); - GPUStaticInst* decode_OP_SOPC__S_SET_GPR_IDX_ON(MachInst); - GPUStaticInst* decode_OP_SOPK__S_ADDK_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CBRANCH_I_FORK(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMOVK_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_EQ_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_EQ_U32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_GE_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_GE_U32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_GT_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_GT_U32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_LE_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_LE_U32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_LG_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_LG_U32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_LT_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_CMPK_LT_U32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_GETREG_B32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_MOVK_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_MULK_I32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_SETREG_B32(MachInst); - GPUStaticInst* decode_OP_SOPK__S_SETREG_IMM32_B32(MachInst); - GPUStaticInst* decode_OP_SOPP__S_BARRIER(MachInst); - GPUStaticInst* decode_OP_SOPP__S_BRANCH(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_CDBGSYS(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_CDBGSYS_AND_USER(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_CDBGSYS_OR_USER(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_CDBGUSER(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_EXECNZ(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_EXECZ(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_SCC0(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_SCC1(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_VCCNZ(MachInst); - GPUStaticInst* decode_OP_SOPP__S_CBRANCH_VCCZ(MachInst); - GPUStaticInst* decode_OP_SOPP__S_DECPERFLEVEL(MachInst); - GPUStaticInst* decode_OP_SOPP__S_ENDPGM(MachInst); - GPUStaticInst* decode_OP_SOPP__S_ENDPGM_SAVED(MachInst); - GPUStaticInst* decode_OP_SOPP__S_ICACHE_INV(MachInst); - GPUStaticInst* decode_OP_SOPP__S_INCPERFLEVEL(MachInst); - GPUStaticInst* decode_OP_SOPP__S_NOP(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SENDMSG(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SENDMSGHALT(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SETHALT(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SETKILL(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SETPRIO(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SET_GPR_IDX_MODE(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SET_GPR_IDX_OFF(MachInst); - GPUStaticInst* decode_OP_SOPP__S_SLEEP(MachInst); - GPUStaticInst* decode_OP_SOPP__S_TRAP(MachInst); - GPUStaticInst* decode_OP_SOPP__S_TTRACEDATA(MachInst); - GPUStaticInst* decode_OP_SOPP__S_WAITCNT(MachInst); - GPUStaticInst* decode_OP_SOPP__S_WAKEUP(MachInst); - GPUStaticInst* decode_OP_VINTRP__V_INTERP_MOV_F32(MachInst); - GPUStaticInst* decode_OP_VINTRP__V_INTERP_P1_F32(MachInst); - GPUStaticInst* decode_OP_VINTRP__V_INTERP_P2_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_BFREV_B32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CEIL_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CEIL_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CEIL_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CLREXCP(MachInst); - GPUStaticInst* decode_OP_VOP1__V_COS_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_COS_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F16_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F16_I16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F16_U16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_I32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_U32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_UBYTE0(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_UBYTE1(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_UBYTE2(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F32_UBYTE3(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F64_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F64_I32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_F64_U32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_FLR_I32_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_I16_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_I32_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_I32_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_OFF_F32_I4(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_RPI_I32_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_U16_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_U32_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_CVT_U32_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_EXP_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_EXP_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_EXP_LEGACY_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FFBH_I32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FFBH_U32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FFBL_B32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FLOOR_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FLOOR_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FLOOR_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FRACT_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FRACT_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FRACT_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FREXP_EXP_I16_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FREXP_EXP_I32_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FREXP_EXP_I32_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FREXP_MANT_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FREXP_MANT_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_FREXP_MANT_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_LOG_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_LOG_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_LOG_LEGACY_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_MOV_B32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_MOV_FED_B32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_NOP(MachInst); - GPUStaticInst* decode_OP_VOP1__V_NOT_B32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RCP_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RCP_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RCP_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RCP_IFLAG_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_READFIRSTLANE_B32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RNDNE_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RNDNE_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RNDNE_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RSQ_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RSQ_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_RSQ_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_SIN_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_SIN_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_SQRT_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_SQRT_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_SQRT_F64(MachInst); - GPUStaticInst* decode_OP_VOP1__V_TRUNC_F16(MachInst); - GPUStaticInst* decode_OP_VOP1__V_TRUNC_F32(MachInst); - GPUStaticInst* decode_OP_VOP1__V_TRUNC_F64(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ADDC_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ADD_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ADD_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ADD_U16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ADD_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_AND_B32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ASHRREV_I16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_ASHRREV_I32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_CNDMASK_B32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_LDEXP_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_LSHLREV_B16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_LSHLREV_B32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_LSHRREV_B16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_LSHRREV_B32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAC_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAC_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MADAK_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MADAK_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MADMK_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MADMK_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAX_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAX_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAX_I16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAX_I32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAX_U16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MAX_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MIN_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MIN_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MIN_I16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MIN_I32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MIN_U16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MIN_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_HI_I32_I24(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_HI_U32_U24(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_I32_I24(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_LEGACY_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_LO_U16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_MUL_U32_U24(MachInst); - GPUStaticInst* decode_OP_VOP2__V_OR_B32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUBBREV_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUBB_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUBREV_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUBREV_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUBREV_U16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUBREV_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUB_F16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUB_F32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUB_U16(MachInst); - GPUStaticInst* decode_OP_VOP2__V_SUB_U32(MachInst); - GPUStaticInst* decode_OP_VOP2__V_XOR_B32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_CLASS_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_CLASS_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_CLASS_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_EQ_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_F_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GE_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_GT_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LE_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LG_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LG_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LG_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_LT_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NEQ_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NEQ_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NEQ_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NE_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NE_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NE_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NE_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NE_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NE_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NGE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NGE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NGE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NGT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NGT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NGT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLG_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLG_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLG_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_NLT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_O_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_O_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_O_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_TRU_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_TRU_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_TRU_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_T_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_T_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_T_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_T_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_T_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_T_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_U_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_U_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMPX_U_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_CLASS_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_CLASS_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_CLASS_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_EQ_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_F_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GE_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_GT_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LE_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LG_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LG_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LG_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_LT_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NEQ_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NEQ_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NEQ_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NE_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NE_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NE_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NE_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NE_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NE_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NGE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NGE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NGE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NGT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NGT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NGT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLE_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLE_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLE_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLG_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLG_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLG_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLT_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLT_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_NLT_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_O_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_O_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_O_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_TRU_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_TRU_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_TRU_F64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_T_I16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_T_I32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_T_I64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_T_U16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_T_U32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_T_U64(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_U_F16(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_U_F32(MachInst); - GPUStaticInst* decode_OP_VOPC__V_CMP_U_F64(MachInst); - GPUStaticInst* subDecode_OPU_VOP3(MachInst); - GPUStaticInst* subDecode_OP_DS(MachInst); - GPUStaticInst* subDecode_OP_FLAT(MachInst); - GPUStaticInst* subDecode_OP_MIMG(MachInst); - GPUStaticInst* subDecode_OP_MTBUF(MachInst); - GPUStaticInst* subDecode_OP_MUBUF(MachInst); - GPUStaticInst* subDecode_OP_SMEM(MachInst); - GPUStaticInst* subDecode_OP_SOP1(MachInst); - GPUStaticInst* subDecode_OP_SOPC(MachInst); - GPUStaticInst* subDecode_OP_SOPP(MachInst); - GPUStaticInst* subDecode_OP_VINTRP(MachInst); - GPUStaticInst* subDecode_OP_VOP1(MachInst); - GPUStaticInst* subDecode_OP_VOPC(MachInst); - GPUStaticInst* decode_invalid(MachInst); - }; - - struct InFmt_DS - { - unsigned int OFFSET0 : 8; - unsigned int OFFSET1 : 8; - unsigned int GDS : 1; - unsigned int OP : 8; - unsigned int pad_25 : 1; - unsigned int ENCODING : 6; - }; - - struct InFmt_DS_1 - { - unsigned int ADDR : 8; - unsigned int DATA0 : 8; - unsigned int DATA1 : 8; - unsigned int VDST : 8; - }; - - struct InFmt_EXP - { - unsigned int EN : 4; - unsigned int TGT : 6; - unsigned int COMPR : 1; - unsigned int DONE : 1; - unsigned int VM : 1; - unsigned int pad_13_25 : 13; - unsigned int ENCODING : 6; - }; - - struct InFmt_EXP_1 - { - unsigned int VSRC0 : 8; - unsigned int VSRC1 : 8; - unsigned int VSRC2 : 8; - unsigned int VSRC3 : 8; - }; - - struct InFmt_FLAT - { - unsigned int pad_0_15 : 16; - unsigned int GLC : 1; - unsigned int SLC : 1; - unsigned int OP : 7; - unsigned int pad_25 : 1; - unsigned int ENCODING : 6; - }; - - struct InFmt_FLAT_1 - { - unsigned int ADDR : 8; - unsigned int DATA : 8; - unsigned int pad_16_22 : 7; - unsigned int TFE : 1; - unsigned int VDST : 8; - }; - - struct InFmt_INST - { - unsigned int ENCODING : 32; - }; - - struct InFmt_MIMG - { - unsigned int pad_0_7 : 8; - unsigned int DMASK : 4; - unsigned int UNORM : 1; - unsigned int GLC : 1; - unsigned int DA : 1; - unsigned int R128 : 1; - unsigned int TFE : 1; - unsigned int LWE : 1; - unsigned int OP : 7; - unsigned int SLC : 1; - unsigned int ENCODING : 6; - }; - - struct InFmt_MIMG_1 - { - unsigned int VADDR : 8; - unsigned int VDATA : 8; - unsigned int SRSRC : 5; - unsigned int SSAMP : 5; - unsigned int pad_26_30 : 5; - unsigned int D16 : 1; - }; - - struct InFmt_MTBUF - { - unsigned int OFFSET : 12; - unsigned int OFFEN : 1; - unsigned int IDXEN : 1; - unsigned int GLC : 1; - unsigned int OP : 4; - unsigned int DFMT : 4; - unsigned int NFMT : 3; - unsigned int ENCODING : 6; - }; - - struct InFmt_MTBUF_1 - { - unsigned int VADDR : 8; - unsigned int VDATA : 8; - unsigned int SRSRC : 5; - unsigned int pad_21 : 1; - unsigned int SLC : 1; - unsigned int TFE : 1; - unsigned int SOFFSET : 8; - }; - - struct InFmt_MUBUF - { - unsigned int OFFSET : 12; - unsigned int OFFEN : 1; - unsigned int IDXEN : 1; - unsigned int GLC : 1; - unsigned int pad_15 : 1; - unsigned int LDS : 1; - unsigned int SLC : 1; - unsigned int OP : 7; - unsigned int pad_25 : 1; - unsigned int ENCODING : 6; - }; - - struct InFmt_MUBUF_1 - { - unsigned int VADDR : 8; - unsigned int VDATA : 8; - unsigned int SRSRC : 5; - unsigned int pad_21_22 : 2; - unsigned int TFE : 1; - unsigned int SOFFSET : 8; - }; - - struct InFmt_SMEM - { - unsigned int SBASE : 6; - unsigned int SDATA : 7; - unsigned int pad_13_15 : 3; - unsigned int GLC : 1; - unsigned int IMM : 1; - unsigned int OP : 8; - unsigned int ENCODING : 6; - }; - - struct InFmt_SMEM_1 - { - unsigned int OFFSET : 20; - }; - - struct InFmt_SOP1 - { - unsigned int SSRC0 : 8; - unsigned int OP : 8; - unsigned int SDST : 7; - unsigned int ENCODING : 9; - }; - - struct InFmt_SOP2 - { - unsigned int SSRC0 : 8; - unsigned int SSRC1 : 8; - unsigned int SDST : 7; - unsigned int OP : 7; - unsigned int ENCODING : 2; - }; - - struct InFmt_SOPC - { - unsigned int SSRC0 : 8; - unsigned int SSRC1 : 8; - unsigned int OP : 7; - unsigned int ENCODING : 9; - }; - - struct InFmt_SOPK - { - unsigned int SIMM16 : 16; - unsigned int SDST : 7; - unsigned int OP : 5; - unsigned int ENCODING : 4; - }; - - struct InFmt_SOPP - { - unsigned int SIMM16 : 16; - unsigned int OP : 7; - unsigned int ENCODING : 9; - }; - - struct InFmt_VINTRP - { - unsigned int VSRC : 8; - unsigned int ATTRCHAN : 2; - unsigned int ATTR : 6; - unsigned int OP : 2; - unsigned int VDST : 8; - unsigned int ENCODING : 6; - }; - - struct InFmt_VOP1 - { - unsigned int SRC0 : 9; - unsigned int OP : 8; - unsigned int VDST : 8; - unsigned int ENCODING : 7; - }; - - struct InFmt_VOP2 - { - unsigned int SRC0 : 9; - unsigned int VSRC1 : 8; - unsigned int VDST : 8; - unsigned int OP : 6; - unsigned int ENCODING : 1; - }; - - struct InFmt_VOP3 - { - unsigned int VDST : 8; - unsigned int ABS : 3; - unsigned int pad_11_14 : 4; - unsigned int CLAMP : 1; - unsigned int OP : 10; - unsigned int ENCODING : 6; - }; - - struct InFmt_VOP3_1 - { - unsigned int SRC0 : 9; - unsigned int SRC1 : 9; - unsigned int SRC2 : 9; - unsigned int OMOD : 2; - unsigned int NEG : 3; - }; - - struct InFmt_VOP3_SDST_ENC - { - unsigned int VDST : 8; - unsigned int SDST : 7; - unsigned int CLAMP : 1; - unsigned int OP : 10; - unsigned int ENCODING : 6; - }; - - struct InFmt_VOPC - { - unsigned int SRC0 : 9; - unsigned int VSRC1 : 8; - unsigned int OP : 8; - unsigned int ENCODING : 7; - }; - - struct InFmt_VOP_DPP - { - unsigned int SRC0 : 8; - unsigned int DPP_CTRL : 9; - unsigned int pad_17_18 : 2; - unsigned int BOUND_CTRL : 1; - unsigned int SRC0_NEG : 1; - unsigned int SRC0_ABS : 1; - unsigned int SRC1_NEG : 1; - unsigned int SRC1_ABS : 1; - unsigned int BANK_MASK : 4; - unsigned int ROW_MASK : 4; - }; - - struct InFmt_VOP_SDWA - { - unsigned int SRC0 : 8; - unsigned int DST_SEL : 3; - unsigned int DST_UNUSED : 2; - unsigned int CLAMP : 1; - unsigned int pad_14_15 : 2; - unsigned int SRC0_SEL : 3; - unsigned int SRC0_SEXT : 1; - unsigned int SRC0_NEG : 1; - unsigned int SRC0_ABS : 1; - unsigned int pad_22_23 : 2; - unsigned int SRC1_SEL : 3; - unsigned int SRC1_SEXT : 1; - unsigned int SRC1_NEG : 1; - unsigned int SRC1_ABS : 1; - }; - - union InstFormat - { - InFmt_DS iFmt_DS; - InFmt_DS_1 iFmt_DS_1; - InFmt_EXP iFmt_EXP; - InFmt_EXP_1 iFmt_EXP_1; - InFmt_FLAT iFmt_FLAT; - InFmt_FLAT_1 iFmt_FLAT_1; - InFmt_INST iFmt_INST; - InFmt_MIMG iFmt_MIMG; - InFmt_MIMG_1 iFmt_MIMG_1; - InFmt_MTBUF iFmt_MTBUF; - InFmt_MTBUF_1 iFmt_MTBUF_1; - InFmt_MUBUF iFmt_MUBUF; - InFmt_MUBUF_1 iFmt_MUBUF_1; - InFmt_SMEM iFmt_SMEM; - InFmt_SMEM_1 iFmt_SMEM_1; - InFmt_SOP1 iFmt_SOP1; - InFmt_SOP2 iFmt_SOP2; - InFmt_SOPC iFmt_SOPC; - InFmt_SOPK iFmt_SOPK; - InFmt_SOPP iFmt_SOPP; - InFmt_VINTRP iFmt_VINTRP; - InFmt_VOP1 iFmt_VOP1; - InFmt_VOP2 iFmt_VOP2; - InFmt_VOP3 iFmt_VOP3; - InFmt_VOP3_1 iFmt_VOP3_1; - InFmt_VOP3_SDST_ENC iFmt_VOP3_SDST_ENC; - InFmt_VOPC iFmt_VOPC; - InFmt_VOP_DPP iFmt_VOP_DPP; - InFmt_VOP_SDWA iFmt_VOP_SDWA; - uint32_t imm_u32; - float imm_f32; - }; // union InstFormat -} // namespace Gcn3ISA -} // namespace gem5 - -#endif // __ARCH_GCN3_DECODER_HH__ diff --git a/src/arch/amdgpu/gcn3/gpu_isa.hh b/src/arch/amdgpu/gcn3/gpu_isa.hh deleted file mode 100644 index 4d5aba46c7..0000000000 --- a/src/arch/amdgpu/gcn3/gpu_isa.hh +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_GPU_ISA_HH__ -#define __ARCH_GCN3_GPU_ISA_HH__ - -#include -#include - -#include "arch/amdgpu/common/tlb.hh" -#include "arch/amdgpu/gcn3/gpu_registers.hh" -#include "gpu-compute/dispatcher.hh" -#include "gpu-compute/hsa_queue_entry.hh" -#include "gpu-compute/misc.hh" - -namespace gem5 -{ - -class Wavefront; - -namespace Gcn3ISA -{ - class GPUISA - { - public: - GPUISA(Wavefront &wf); - - template T - readConstVal(int opIdx) const - { - panic_if(!std::is_integral_v, "Constant values must " - "be an integer.\n"); - T val(0); - - if (isPosConstVal(opIdx)) { - val = (T)readPosConstReg(opIdx); - } - - if (isNegConstVal(opIdx)) { - val = (T)readNegConstReg(opIdx); - } - - return val; - } - - ScalarRegU32 readMiscReg(int opIdx) const; - void writeMiscReg(int opIdx, ScalarRegU32 operandVal); - bool hasScalarUnit() const { return true; } - void advancePC(GPUDynInstPtr gpuDynInst); - - private: - ScalarRegU32 readPosConstReg(int opIdx) const - { - return posConstRegs[opIdx - REG_INT_CONST_POS_MIN]; - } - - ScalarRegI32 readNegConstReg(int opIdx) const - { - return negConstRegs[opIdx - REG_INT_CONST_NEG_MIN]; - } - - static const std::array - posConstRegs; - static const std::array - negConstRegs; - - // parent wavefront - Wavefront &wavefront; - - // shader status bits - StatusReg statusReg; - // memory descriptor reg - ScalarRegU32 m0; - }; -} // namespace Gcn3ISA -} // namespace gem5 - -#endif // __ARCH_GCN3_GPU_ISA_HH__ diff --git a/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh deleted file mode 100644 index 05299e1a0d..0000000000 --- a/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright (c) 2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__ -#define __ARCH_GCN3_GPU_MEM_HELPERS_HH__ - -#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" -#include "arch/amdgpu/gcn3/insts/op_encodings.hh" -#include "debug/GPUMem.hh" -#include "gpu-compute/gpu_dyn_inst.hh" - -namespace gem5 -{ - -/** - * Helper function for instructions declared in op_encodings. This function - * takes in all of the arguments for a given memory request we are trying to - * initialize, then submits the request or requests depending on if the - * original request is aligned or unaligned. - */ -template -inline void -initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, - bool is_atomic=false) -{ - // local variables - int req_size = N * sizeof(T); - int block_size = gpuDynInst->computeUnit()->cacheLineSize(); - Addr vaddr = 0, split_addr = 0; - bool misaligned_acc = false; - RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr; - PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr; - - gpuDynInst->resetEntireStatusVector(); - for (int lane = 0; lane < Gcn3ISA::NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vaddr = gpuDynInst->addr[lane]; - - /** - * the base address of the cache line where the the last - * byte of the request will be stored. - */ - split_addr = roundDown(vaddr + req_size - 1, block_size); - - assert(split_addr <= vaddr || split_addr - vaddr < block_size); - /** - * if the base cache line address of the last byte is - * greater than the address of the first byte then we have - * a misaligned access. - */ - misaligned_acc = split_addr > vaddr; - - if (is_atomic) { - // make sure request is word aligned - assert((vaddr & 0x3) == 0); - - // a given lane's atomic can't cross cache lines - assert(!misaligned_acc); - - req = std::make_shared(vaddr, sizeof(T), 0, - gpuDynInst->computeUnit()->requestorId(), 0, - gpuDynInst->wfDynId, - gpuDynInst->makeAtomicOpFunctor( - &(reinterpret_cast(gpuDynInst->a_data))[lane], - &(reinterpret_cast(gpuDynInst->x_data))[lane])); - } else { - req = std::make_shared(vaddr, req_size, 0, - gpuDynInst->computeUnit()->requestorId(), 0, - gpuDynInst->wfDynId); - } - - if (misaligned_acc) { - gpuDynInst->setStatusVector(lane, 2); - req->splitOnVaddr(split_addr, req1, req2); - gpuDynInst->setRequestFlags(req1); - gpuDynInst->setRequestFlags(req2); - pkt1 = new Packet(req1, mem_req_type); - pkt2 = new Packet(req2, mem_req_type); - pkt1->dataStatic(&(reinterpret_cast( - gpuDynInst->d_data))[lane * N]); - pkt2->dataStatic(&(reinterpret_cast( - gpuDynInst->d_data))[lane * N + - req1->getSize()/sizeof(T)]); - DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory " - "request for %#x\n", gpuDynInst->cu_id, - gpuDynInst->simdId, gpuDynInst->wfSlotId, lane, - split_addr); - gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1); - gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2); - } else { - gpuDynInst->setStatusVector(lane, 1); - gpuDynInst->setRequestFlags(req); - pkt = new Packet(req, mem_req_type); - pkt->dataStatic(&(reinterpret_cast( - gpuDynInst->d_data))[lane * N]); - gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt); - } - } else { // if lane is not active, then no pending requests - gpuDynInst->setStatusVector(lane, 0); - } - } -} - -/** - * Helper function for scalar instructions declared in op_encodings. This - * function takes in all of the arguments for a given memory request we are - * trying to initialize, then submits the request or requests depending on if - * the original request is aligned or unaligned. - */ -template -inline void -initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type) -{ - int req_size = N * sizeof(T); - int block_size = gpuDynInst->computeUnit()->cacheLineSize(); - Addr vaddr = gpuDynInst->scalarAddr; - - /** - * the base address of the cache line where the the last byte of - * the request will be stored. - */ - Addr split_addr = roundDown(vaddr + req_size - 1, block_size); - - assert(split_addr <= vaddr || split_addr - vaddr < block_size); - /** - * if the base cache line address of the last byte is greater - * than the address of the first byte then we have a misaligned - * access. - */ - bool misaligned_acc = split_addr > vaddr; - - RequestPtr req = std::make_shared(vaddr, req_size, 0, - gpuDynInst->computeUnit()->requestorId(), 0, - gpuDynInst->wfDynId); - - if (misaligned_acc) { - RequestPtr req1, req2; - req->splitOnVaddr(split_addr, req1, req2); - gpuDynInst->numScalarReqs = 2; - gpuDynInst->setRequestFlags(req1); - gpuDynInst->setRequestFlags(req2); - PacketPtr pkt1 = new Packet(req1, mem_req_type); - PacketPtr pkt2 = new Packet(req2, mem_req_type); - pkt1->dataStatic(gpuDynInst->scalar_data); - pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize()); - DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for" - " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId, - gpuDynInst->wfSlotId, split_addr); - gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1); - gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2); - } else { - gpuDynInst->numScalarReqs = 1; - gpuDynInst->setRequestFlags(req); - PacketPtr pkt = new Packet(req, mem_req_type); - pkt->dataStatic(gpuDynInst->scalar_data); - gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt); - } -} - -} // namespace gem5 - -#endif // __ARCH_GCN3_GPU_MEM_HELPERS_HH__ diff --git a/src/arch/amdgpu/gcn3/gpu_registers.hh b/src/arch/amdgpu/gcn3/gpu_registers.hh deleted file mode 100644 index 7f1307f372..0000000000 --- a/src/arch/amdgpu/gcn3/gpu_registers.hh +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_REGISTERS_HH__ -#define __ARCH_GCN3_REGISTERS_HH__ - -#include -#include -#include - -#include "arch/generic/vec_reg.hh" -#include "base/intmath.hh" -#include "base/logging.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - enum OpSelector : int - { - REG_SGPR_MIN = 0, - REG_SGPR_MAX = 101, - REG_FLAT_SCRATCH_LO = 102, - REG_FLAT_SCRATCH_HI = 103, - REG_XNACK_MASK_LO = 104, - REG_XNACK_MASK_HI = 105, - REG_VCC_LO = 106, - REG_VCC_HI = 107, - REG_TBA_LO = 108, - REG_TBA_HI = 109, - REG_TMA_LO = 110, - REG_TMA_HI = 111, - REG_TTMP_0 = 112, - REG_TTMP_1 = 113, - REG_TTMP_2 = 114, - REG_TTMP_3 = 115, - REG_TTMP_4 = 116, - REG_TTMP_5 = 117, - REG_TTMP_6 = 118, - REG_TTMP_7 = 119, - REG_TTMP_8 = 120, - REG_TTMP_9 = 121, - REG_TTMP_10 = 122, - REG_TTMP_11 = 123, - REG_M0 = 124, - REG_RESERVED_1 = 125, - REG_EXEC_LO = 126, - REG_EXEC_HI = 127, - REG_ZERO = 128, - REG_INT_CONST_POS_MIN = 129, - REG_INT_CONST_POS_MAX = 192, - REG_INT_CONST_NEG_MIN = 193, - REG_INT_CONST_NEG_MAX = 208, - REG_RESERVED_2 = 209, - REG_RESERVED_3 = 210, - REG_RESERVED_4 = 211, - REG_RESERVED_5 = 212, - REG_RESERVED_6 = 213, - REG_RESERVED_7 = 214, - REG_RESERVED_8 = 215, - REG_RESERVED_9 = 216, - REG_RESERVED_10 = 217, - REG_RESERVED_11 = 218, - REG_RESERVED_12 = 219, - REG_RESERVED_13 = 220, - REG_RESERVED_14 = 221, - REG_RESERVED_15 = 222, - REG_RESERVED_16 = 223, - REG_RESERVED_17 = 224, - REG_RESERVED_18 = 225, - REG_RESERVED_19 = 226, - REG_RESERVED_20 = 227, - REG_RESERVED_21 = 228, - REG_RESERVED_22 = 229, - REG_RESERVED_23 = 230, - REG_RESERVED_24 = 231, - REG_RESERVED_25 = 232, - REG_RESERVED_26 = 233, - REG_RESERVED_27 = 234, - REG_RESERVED_28 = 235, - REG_RESERVED_29 = 236, - REG_RESERVED_30 = 237, - REG_RESERVED_31 = 238, - REG_RESERVED_32 = 239, - REG_POS_HALF = 240, - REG_NEG_HALF = 241, - REG_POS_ONE = 242, - REG_NEG_ONE = 243, - REG_POS_TWO = 244, - REG_NEG_TWO = 245, - REG_POS_FOUR = 246, - REG_NEG_FOUR = 247, - REG_PI = 248, - /* NOTE: SDWA and SWDA both refer to sub d-word addressing */ - REG_SRC_SWDA = 249, - REG_SRC_DPP = 250, - REG_VCCZ = 251, - REG_EXECZ = 252, - REG_SCC = 253, - REG_LDS_DIRECT = 254, - REG_SRC_LITERAL = 255, - REG_VGPR_MIN = 256, - REG_VGPR_MAX = 511 - }; - - constexpr size_t MaxOperandDwords(16); - const int NumVecElemPerVecReg(64); - // op selector values 129 - 192 correspond to const values 1 - 64 - const int NumPosConstRegs = REG_INT_CONST_POS_MAX - - REG_INT_CONST_POS_MIN + 1; - // op selector values 193 - 208 correspond to const values -1 - 16 - const int NumNegConstRegs = REG_INT_CONST_NEG_MAX - - REG_INT_CONST_NEG_MIN + 1; - const int BITS_PER_BYTE = 8; - const int BITS_PER_WORD = 16; - const int MSB_PER_BYTE = (BITS_PER_BYTE - 1); - const int MSB_PER_WORD = (BITS_PER_WORD - 1); - - // typedefs for the various sizes/types of scalar regs - typedef uint8_t ScalarRegU8; - typedef int8_t ScalarRegI8; - typedef uint16_t ScalarRegU16; - typedef int16_t ScalarRegI16; - typedef uint32_t ScalarRegU32; - typedef int32_t ScalarRegI32; - typedef float ScalarRegF32; - typedef uint64_t ScalarRegU64; - typedef int64_t ScalarRegI64; - typedef double ScalarRegF64; - - // typedefs for the various sizes/types of vector reg elements - typedef uint8_t VecElemU8; - typedef int8_t VecElemI8; - typedef uint16_t VecElemU16; - typedef int16_t VecElemI16; - typedef uint32_t VecElemU32; - typedef int32_t VecElemI32; - typedef float VecElemF32; - typedef uint64_t VecElemU64; - typedef int64_t VecElemI64; - typedef double VecElemF64; - - const int DWordSize = sizeof(VecElemU32); - /** - * Size of a single-precision register in DWords. - */ - const int RegSizeDWords = sizeof(VecElemU32) / DWordSize; - - using VecRegContainerU32 = - VecRegContainer; - - struct StatusReg - { - StatusReg() : SCC(0), SPI_PRIO(0), USER_PRIO(0), PRIV(0), TRAP_EN(0), - TTRACE_EN(0), EXPORT_RDY(0), EXECZ(0), VCCZ(0), IN_TG(0), - IN_BARRIER(0), HALT(0), TRAP(0), TTRACE_CU_EN(0), VALID(0), - ECC_ERR(0), SKIP_EXPORT(0), PERF_EN(0), COND_DBG_USER(0), - COND_DBG_SYS(0), ALLOW_REPLAY(0), INSTRUCTION_ATC(0), RESERVED(0), - MUST_EXPORT(0), RESERVED_1(0) - { - } - - uint32_t SCC : 1; - uint32_t SPI_PRIO : 2; - uint32_t USER_PRIO : 2; - uint32_t PRIV : 1; - uint32_t TRAP_EN : 1; - uint32_t TTRACE_EN : 1; - uint32_t EXPORT_RDY : 1; - uint32_t EXECZ : 1; - uint32_t VCCZ : 1; - uint32_t IN_TG : 1; - uint32_t IN_BARRIER : 1; - uint32_t HALT : 1; - uint32_t TRAP : 1; - uint32_t TTRACE_CU_EN : 1; - uint32_t VALID : 1; - uint32_t ECC_ERR : 1; - uint32_t SKIP_EXPORT : 1; - uint32_t PERF_EN : 1; - uint32_t COND_DBG_USER : 1; - uint32_t COND_DBG_SYS : 1; - uint32_t ALLOW_REPLAY : 1; - uint32_t INSTRUCTION_ATC : 1; - uint32_t RESERVED : 3; - uint32_t MUST_EXPORT : 1; - uint32_t RESERVED_1 : 4; - }; - - std::string opSelectorToRegSym(int opIdx, int numRegs=0); - int opSelectorToRegIdx(int opIdx, int numScalarRegs); - bool isPosConstVal(int opIdx); - bool isNegConstVal(int opIdx); - bool isConstVal(int opIdx); - bool isLiteral(int opIdx); - bool isScalarReg(int opIdx); - bool isVectorReg(int opIdx); - bool isFlatScratchReg(int opIdx); - bool isExecMask(int opIdx); - bool isVccReg(int opIdx); -} // namespace Gcn3ISA -} // namespace gem5 - -#endif // __ARCH_GCN3_REGISTERS_HH__ diff --git a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh deleted file mode 100644 index aa742f2e3a..0000000000 --- a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_INSTS_GPU_STATIC_INST_HH__ -#define __ARCH_GCN3_INSTS_GPU_STATIC_INST_HH__ - -#include "arch/amdgpu/gcn3/gpu_registers.hh" -#include "arch/amdgpu/gcn3/operand.hh" -#include "gpu-compute/gpu_static_inst.hh" -#include "gpu-compute/scalar_register_file.hh" -#include "gpu-compute/vector_register_file.hh" -#include "gpu-compute/wavefront.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - class GCN3GPUStaticInst : public GPUStaticInst - { - public: - GCN3GPUStaticInst(const std::string &opcode); - ~GCN3GPUStaticInst(); - - void generateDisassembly() override { disassembly = _opcode; } - - bool - isFlatScratchRegister(int opIdx) override - { - return isFlatScratchReg(opIdx); - } - - bool - isExecMaskRegister(int opIdx) override - { - return isExecMask(opIdx); - } - - void initOperandInfo() override { return; } - int getOperandSize(int opIdx) override { return 0; } - - /** - * Return the number of tokens needed by the coalescer. In GCN3 there - * is generally one packet per memory request per lane generated. In - * HSAIL, the number of dest operands is used for loads and src - * operands for stores. This method should be overriden on a per-inst - * basis when this value differs. - */ - int coalescerTokenCount() const override { return 1; } - ScalarRegU32 srcLiteral() const override { return _srcLiteral; } - - protected: - void panicUnimplemented() const; - - /** - * if the instruction has a src literal - an immediate - * value that is part of the instruction stream - we - * store that here - */ - ScalarRegU32 _srcLiteral; - }; // class GCN3GPUStaticInst - -} // namespace Gcn3ISA -} // namespace gem5 - -#endif //__ARCH_GCN3_INSTS_GPU_STATIC_INST_HH__ diff --git a/src/arch/amdgpu/gcn3/insts/inst_util.hh b/src/arch/amdgpu/gcn3/insts/inst_util.hh deleted file mode 100644 index dfade6ad87..0000000000 --- a/src/arch/amdgpu/gcn3/insts/inst_util.hh +++ /dev/null @@ -1,896 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_INSTS_INST_UTIL_HH__ -#define __ARCH_GCN3_INSTS_INST_UTIL_HH__ - -#include - -#include "arch/amdgpu/gcn3/gpu_registers.hh" - -namespace gem5 -{ - -// values for SDWA select operations -enum SDWASelVals : int -{ - SDWA_BYTE_0 = 0, /* select data[7:0] */ - SDWA_BYTE_1 = 1, /* select data[15:8] */ - SDWA_BYTE_2 = 2, /* select data[23:16] */ - SDWA_BYTE_3 = 3, /* select data[31:24] */ - SDWA_WORD_0 = 4, /* select data[15:0] */ - SDWA_WORD_1 = 5, /* select data[31:16] */ - SDWA_DWORD = 6 /* select data[31:0] */ -}; - -// values for format of destination bits for SDWA operations -enum SDWADstVals : int -{ - SDWA_UNUSED_PAD = 0, /* Pad all unused bits with 0 */ - SDWA_UNUSED_SEXT = 1, /* Sign-extend upper bits; pad lower bits w/ 0 */ - SDWA_UNUSED_PRESERVE = 2 /* select data[31:0] */ -}; - -// values for DPP operations -enum SqDPPVals : int -{ - SQ_DPP_QUAD_PERM_MAX = 0xFF, - SQ_DPP_RESERVED = 0x100, - SQ_DPP_ROW_SL1 = 0x101, - SQ_DPP_ROW_SL15 = 0x10F, - SQ_DPP_ROW_SR1 = 0x111, - SQ_DPP_ROW_SR15 = 0x11F, - SQ_DPP_ROW_RR1 = 0x121, - SQ_DPP_ROW_RR15 = 0x12F, - SQ_DPP_WF_SL1 = 0x130, - SQ_DPP_WF_RL1 = 0x134, - SQ_DPP_WF_SR1 = 0x138, - SQ_DPP_WF_RR1 = 0x13C, - SQ_DPP_ROW_MIRROR = 0x140, - SQ_DPP_ROW_HALF_MIRROR = 0x141, - SQ_DPP_ROW_BCAST15 = 0x142, - SQ_DPP_ROW_BCAST31 = 0x143 -}; -static const int ROW_SIZE = 16; /* 16 registers per row */ -static const int NUM_BANKS = 4; /* 64 registers, 16/bank */ - -namespace Gcn3ISA -{ - template - inline T - wholeQuadMode(T val) - { - T wqm = 0; - T mask = 0xF; - - for (T bits = val; mask != 0; mask <<= 4) - if ((bits & mask) != 0) - wqm |= mask; - - return wqm; - } - - template - inline T - quadMask(T val) - { - T qmsk = 0; - T mask = 0xF; - T qbit = 0x1; - - for (T bits = val; mask != 0; mask <<= 4, qbit <<= 1) { - if (bits & mask) { - qmsk |= qbit; - } - } - - return qmsk; - } - - template - inline ScalarRegI32 - countZeroBits(T val) - { - ScalarRegI32 num_zeros - = std::numeric_limits::digits - popCount(val); - - return num_zeros; - } - - template - inline ScalarRegI32 - findFirstZero(T val) - { - if (val == ~T(0)) { - return -1; - } - - return findLsbSet(~val); - } - - template - inline ScalarRegI32 - findFirstOne(T val) - { - if (!val) { - return -1; - } - - return findLsbSet(val); - } - - template - inline ScalarRegI32 - findFirstOneMsb(T val) - { - if (!val) { - return -1; - } - - return findMsbSet(val); - } - - template - inline ScalarRegI32 - countZeroBitsMsb(T val) - { - if (!val) { - return -1; - } - - return std::numeric_limits::digits - 1 - findMsbSet(val); - } - - inline ScalarRegI32 - firstOppositeSignBit(ScalarRegI32 val) - { - bool found(false); - bool sign_bit = (val & 0x80000000) != 0; - ScalarRegU32 tmp_val(0); - int count(0); - - if (!val || val == -1) { - return -1; - } - - for (int i = 0; i < std::numeric_limits::digits; ++i) { - tmp_val = val & (0x80000000 >> i); - - if (!sign_bit) { - if (tmp_val) { - found = true; - break; - } - } else { - if (!tmp_val) { - found = true; - break; - } - } - ++count; - } - - if (found) { - return count; - } else { - return -1; - } - } - - inline ScalarRegI32 - firstOppositeSignBit(ScalarRegI64 val) - { - bool found(false); - bool sign_bit = (val & 0x8000000000000000ULL) != 0; - ScalarRegU64 tmp_val(0); - int count(0); - - if (!val || val == -1) { - return -1; - } - - for (int i = 0; i < std::numeric_limits::digits; ++i) { - tmp_val = val & (0x8000000000000000ULL >> i); - - if (!sign_bit) { - if (tmp_val) { - found = true; - break; - } - } else { - if (!tmp_val) { - found = true; - break; - } - } - ++count; - } - - if (found) { - return count; - } else { - return -1; - } - } - - template - inline T - median(T val_0, T val_1, T val_2) - { - if (std::is_floating_point_v) { - return std::fmax(std::fmin(val_0, val_1), - std::fmin(std::fmax(val_0, val_1), val_2)); - } else { - return std::max(std::min(val_0, val_1), - std::min(std::max(val_0, val_1), val_2)); - } - } - - template - inline T roundNearestEven(T val) - { - T int_part = 0; - T nearest_round = std::floor(val + 0.5); - if ((int)std::floor(val) % 2 == 0 - && std::modf(std::abs(val), &int_part) == 0.5) { - nearest_round = nearest_round - 1; - } - - return nearest_round; - } - - inline VecElemU32 - muladd(VecElemU64 &dst, VecElemU32 val_0, VecElemU32 val_1, - VecElemU64 val_2) - { - __uint128_t u0 = (__uint128_t)val_0; - __uint128_t u1 = (__uint128_t)val_1; - __uint128_t u2 = (__uint128_t)val_2; - __uint128_t result = u0 * u1 + u2; - - dst = (VecElemU64)result; - - return (VecElemU32)(result >> 64) ? 1 : 0; - } - - inline VecElemU32 - muladd(VecElemI64 &dst, VecElemI32 val_0, VecElemI32 val_1, - VecElemI64 val_2) - { - __int128_t u0 = (__int128_t)val_0; - __int128_t u1 = (__int128_t)val_1; - __int128_t u2 = (__int128_t)val_2; - __int128_t result = u0 * u1 + u2; - - dst = (VecElemI64)result; - - return (VecElemU32)(result >> 64) ? 1 : 0; - } - - /** - * dppInstImpl is a helper function that performs the inputted operation - * on the inputted vector register lane. The returned output lane - * represents the input lane given the destination lane and DPP_CTRL word. - * - * Currently the values are: - * 0x0 - 0xFF: full permute of four threads - * 0x100: reserved - * 0x101 - 0x10F: row shift right by 1-15 threads - * 0x111 - 0x11F: row shift right by 1-15 threads - * 0x121 - 0x12F: row shift right by 1-15 threads - * 0x130: wavefront left shift by 1 thread - * 0x134: wavefront left rotate by 1 thread - * 0x138: wavefront right shift by 1 thread - * 0x13C: wavefront right rotate by 1 thread - * 0x140: mirror threads within row - * 0x141: mirror threads within 1/2 row (8 threads) - * 0x142: broadcast 15th thread of each row to next row - * 0x143: broadcast thread 31 to rows 2 and 3 - */ - int dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum, - int rowOffset, bool & outOfBounds) - { - // local variables - // newLane will be the same as the input lane unless swizzling happens - int newLane = currLane; - // for shift/rotate permutations; positive values are LEFT rotates - int count = 1; - int localRowOffset = rowOffset; - int localRowNum = rowNum; - - if (dppCtrl <= SQ_DPP_QUAD_PERM_MAX) { // DPP_QUAD_PERM{00:FF} - int quadBase = (currLane & ~(3)); - int quadPix = (currLane & 3); - quadPix = ((dppCtrl >> (2 * quadPix)) & 3); - newLane = (quadBase | quadPix); - } else if (dppCtrl == SQ_DPP_RESERVED) { - panic("ERROR: instruction using reserved DPP_CTRL value\n"); - } else if ((dppCtrl >= SQ_DPP_ROW_SL1) && - (dppCtrl <= SQ_DPP_ROW_SL15)) { // DPP_ROW_SL{1:15} - count -= (dppCtrl - SQ_DPP_ROW_SL1 + 1); - if ((localRowOffset + count >= 0) && - (localRowOffset + count < ROW_SIZE)) { - localRowOffset += count; - newLane = (rowNum | localRowOffset); - } else { - outOfBounds = true; - } - } else if ((dppCtrl >= SQ_DPP_ROW_SR1) && - (dppCtrl <= SQ_DPP_ROW_SR15)) { // DPP_ROW_SR{1:15} - count -= (dppCtrl - SQ_DPP_ROW_SR1 + 1); - if ((localRowOffset + count >= 0) && - (localRowOffset + count < ROW_SIZE)) { - localRowOffset += count; - newLane = (rowNum | localRowOffset); - } else { - outOfBounds = true; - } - } else if ((dppCtrl >= SQ_DPP_ROW_RR1) && - (dppCtrl <= SQ_DPP_ROW_RR15)) { // DPP_ROW_RR{1:15} - count -= (dppCtrl - SQ_DPP_ROW_RR1 + 1); - localRowOffset = (localRowOffset + count + ROW_SIZE) % ROW_SIZE; - newLane = (rowNum | localRowOffset); - } else if (dppCtrl == SQ_DPP_WF_SL1) { // DPP_WF_SL1 - count = 1; - if ((currLane >= 0) && (currLane < NumVecElemPerVecReg)) { - newLane += count; - } else { - outOfBounds = true; - } - } else if (dppCtrl == SQ_DPP_WF_RL1) { // DPP_WF_RL1 - count = 1; - newLane = (currLane + count + NumVecElemPerVecReg) % - NumVecElemPerVecReg; - } else if (dppCtrl == SQ_DPP_WF_SR1) { // DPP_WF_SR1 - count = -1; - int currVal = (currLane + count); - if ((currVal >= 0) && (currVal < NumVecElemPerVecReg)) { - newLane += count; - } else { - outOfBounds = true; - } - } else if (dppCtrl == SQ_DPP_WF_RR1) { // DPP_WF_RR1 - count = -1; - newLane = (currLane + count + NumVecElemPerVecReg) % - NumVecElemPerVecReg; - } else if (dppCtrl == SQ_DPP_ROW_MIRROR) { // DPP_ROW_MIRROR - localRowOffset = (15 - localRowOffset); - newLane = (rowNum | localRowOffset); - } else if (dppCtrl == SQ_DPP_ROW_HALF_MIRROR) { // DPP_ROW_HALF_MIRROR - localRowNum = (currLane & -0x7); - localRowOffset = (currLane & 0x7); - localRowOffset = (7 - localRowNum); - newLane = (localRowNum | localRowOffset); - } else if (dppCtrl == SQ_DPP_ROW_BCAST15) { // DPP_ROW_BCAST15 - count = 15; - if (currLane > count) { - newLane = (currLane & ~count) - 1; - } - } else if (dppCtrl == SQ_DPP_ROW_BCAST31) { // DPP_ROW_BCAST31 - count = 31; - if (currLane > count) { - newLane = (currLane & ~count) - 1; - } - } else { - panic("Unimplemented DPP control operation: %d\n", dppCtrl); - } - - return newLane; - } - - /** - * processDPP is a helper function for implementing Data Parallel Primitive - * instructions. This function may be called by many different VOP1 - * instructions to do operations within a register. - */ - template - void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, - T & src0) - { - // local variables - SqDPPVals dppCtrl = (SqDPPVals)dppInst.DPP_CTRL; - int boundCtrl = dppInst.BOUND_CTRL; - int bankMask = dppInst.BANK_MASK; - int rowMask = dppInst.ROW_MASK; - // row, bank info to be calculated per lane - int rowNum = 0, bankNum = 0, rowOffset = 0; - // outLane will be the same as the input lane unless swizzling happens - int outLane = 0; - bool laneDisabled = false; - // flags used for determining if a lane should be written to/reset/etc. - bool outOfBounds = false, zeroSrc = false; - long long threadValid = 0; - - /** - * STEP 1a: check if the absolute value (ABS) or negation (NEG) tags - * are set. If so, do the appropriate action(s) on src0 and/or src1. - * - * NOTE: ABS takes priority over NEG. - */ - if (dppInst.SRC0_NEG) { - src0.negModifier(); - } - - if (dppInst.SRC0_ABS) { - src0.absModifier(); - } - - // iterate over all register lanes, performing steps 2-4 - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - threadValid = (0x1LL << lane); - /** - * STEP 2: check the row and bank mask values. These determine - * which threads are enabled for the subsequent DPP_CTRL - * operations. - */ - rowNum = (lane / ROW_SIZE); - rowOffset = (lane % ROW_SIZE); - bankNum = (rowOffset / NUM_BANKS); - - if (((rowMask & (0x1 << rowNum)) == 0) /* row mask */ || - ((bankMask & (0x1 << bankNum)) == 0) /* bank mask */) { - laneDisabled = true; - continue; - } - - /** - * STEP 4: Handle the potential values of DPP_CTRL: - * 0x0 - 0xFF: full permute of four threads - * 0x100: reserved - * 0x101 - 0x10F: row shift right by 1-15 threads - * 0x111 - 0x11F: row shift right by 1-15 threads - * 0x121 - 0x12F: row shift right by 1-15 threads - * 0x130: wavefront left shift by 1 thread - * 0x134: wavefront left rotate by 1 thread - * 0x138: wavefront right shift by 1 thread - * 0x13C: wavefront right rotate by 1 thread - * 0x140: mirror threads within row - * 0x141: mirror threads within 1/2 row (8 threads) - * 0x142: broadcast 15th thread of each row to next row - * 0x143: broadcast thread 31 to rows 2 and 3 - */ - if (!laneDisabled) { - outLane = dppInstImpl(dppCtrl, lane, rowNum, rowOffset, - outOfBounds); - } - - /** - * STEP 4: Implement bound control for disabled threads. If thread - * is disabled but boundCtrl is set, then we need to set the source - * data to 0 (i.e., set this lane to 0). - */ - if (laneDisabled) { - threadValid = 0; - } else if (outOfBounds) { - if (boundCtrl == 1) { - zeroSrc = true; - } else { - threadValid = 0; - } - } else if (!gpuDynInst->exec_mask[lane]) { - if (boundCtrl == 1) { - zeroSrc = true; - } else { - threadValid = 0; - } - } - - if (threadValid != 0 && !outOfBounds && !zeroSrc) { - assert(!laneDisabled); - src0[outLane] = src0[lane]; - } else if (zeroSrc) { - src0[lane] = 0; - } - - // reset for next iteration - laneDisabled = false; - } - } - - /** - * processDPP is a helper function for implementing Data Parallel Primitive - * instructions. This function may be called by many different - * VOP2/VOPC instructions to do operations within a register. - */ - template - void processDPP(GPUDynInstPtr gpuDynInst, InFmt_VOP_DPP dppInst, - T & src0, T & src1) - { - /** - * STEP 1b: check if the absolute value (ABS) or negation (NEG) tags - * are set. If so, do the appropriate action(s) on src0 and/or src1. - * - * NOTE: ABS takes priority over NEG. - */ - if (dppInst.SRC1_NEG) { - src1.negModifier(); - } - - if (dppInst.SRC1_ABS) { - src1.absModifier(); - } - - // Since only difference for VOP1 and VOP2/VOPC instructions is SRC1, - // which is only used for negation/absolute value, call other version - // to do everything else. - processDPP(gpuDynInst, dppInst, src0); - } - - /** - * sdwaInstSrcImpl_helper contains the per-lane code for selecting the - * appropriate bytes/words of the lane and doing the appropriate - * masking/padding/sign extending. It returns the value after these - * operations are done on it. - */ - template - T sdwaInstSrcImpl_helper(T currOperVal, const T origOperVal, - const SDWASelVals sel, const bool signExt) - { - // local variables - int low_bit = 0, high_bit = 0; - bool signExt_local = signExt; - T retVal = 0; - - // if we're preserving all of the bits, then we can immediately return - if (sel == SDWA_DWORD) { - return currOperVal; - } - - if (sel < SDWA_WORD_0) { // we are selecting 1 byte - /* - Process byte 0 first. This code eiter selects the original bits - of byte 0, or makes the bits of the selected byte be byte 0 (and - next either sign extends or zero's out upper bits). - */ - low_bit = (sel * Gcn3ISA::BITS_PER_BYTE); - high_bit = low_bit + Gcn3ISA::MSB_PER_BYTE; - retVal = bits(currOperVal, high_bit, low_bit); - - // make sure update propagated, since used next - fatal_if(bits(retVal, Gcn3ISA::MSB_PER_BYTE) != - bits(origOperVal, high_bit), - "ERROR: SDWA byte update not propagated: retVal: %d, " - "orig: %d\n", bits(retVal, Gcn3ISA::MSB_PER_BYTE), - bits(origOperVal, high_bit)); - // sign extended value depends on upper-most bit of the new byte 0 - signExt_local = (signExt && - (bits(retVal, Gcn3ISA::MSB_PER_BYTE, 0) & 0x80)); - - // process all other bytes -- if sign extending, make them 1, else - // all 0's so leave as is - if (signExt_local) { - retVal = (uint32_t)sext(retVal); - } - } else if (sel < SDWA_DWORD) { // we are selecting 1 word - /* - Process word 0 first. This code eiter selects the original bits - of word 0, or makes the bits of the selected word be word 0 (and - next either sign extends or zero's out upper bits). - */ - low_bit = (sel & 1) * Gcn3ISA::BITS_PER_WORD; - high_bit = low_bit + Gcn3ISA::MSB_PER_WORD; - retVal = bits(currOperVal, high_bit, low_bit); - - // make sure update propagated, since used next - fatal_if(bits(retVal, Gcn3ISA::MSB_PER_WORD) != - bits(origOperVal, high_bit), - "ERROR: SDWA word update not propagated: retVal: %d, " - "orig: %d\n", - bits(retVal, Gcn3ISA::MSB_PER_WORD), - bits(origOperVal, high_bit)); - // sign extended value depends on upper-most bit of the new word 0 - signExt_local = (signExt && - (bits(retVal, Gcn3ISA::MSB_PER_WORD, 0) & - 0x8000)); - - // process other word -- if sign extending, make them 1, else all - // 0's so leave as is - if (signExt_local) { - retVal = (uint32_t)sext(retVal); - } - } else { - assert(sel != SDWA_DWORD); // should have returned earlier - panic("Unimplemented SDWA select operation: %d\n", sel); - } - - return retVal; - } - - - /** - * sdwaInstSrcImpl is a helper function that selects the appropriate - * bits/bytes for each lane of the inputted source operand of an SDWA - * instruction, does the appropriate masking/padding/sign extending for the - * non-selected bits/bytes, and updates the operands values with the - * resultant value. - * - * The desired behavior is: - * 1. Select the appropriate bits/bytes based on sel: - * 0 (SDWA_BYTE_0): select data[7:0] - * 1 (SDWA_BYTE_1): select data[15:8] - * 2 (SDWA_BYTE_2): select data[23:16] - * 3 (SDWA_BYTE_3): select data[31:24] - * 4 (SDWA_WORD_0): select data[15:0] - * 5 (SDWA_WORD_1): select data[31:16] - * 6 (SDWA_DWORD): select data[31:0] - * 2. if sign extend is set, then sign extend the value - */ - template - void sdwaInstSrcImpl(T & currOper, T & origCurrOper, - const SDWASelVals sel, const bool signExt) - { - // iterate over all lanes, setting appropriate, selected value - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - currOper[lane] = sdwaInstSrcImpl_helper(currOper[lane], - origCurrOper[lane], sel, - signExt); - } - } - - - /** - * sdwaInstDstImpl_helper contains the per-lane code for selecting the - * appropriate bytes/words of the lane and doing the appropriate - * masking/padding/sign extending. It returns the value after these - * operations are done on it. - */ - template - T sdwaInstDstImpl_helper(T currDstVal, const T origDstVal, - const bool clamp, const SDWASelVals sel, - const SDWADstVals unusedBits_format) - { - // local variables - int low_bit = 0, high_bit = 0; - bool signExt = (unusedBits_format == SDWA_UNUSED_SEXT); - //bool pad = (unusedBits_format == SDWA_UNUSED_PAD); - bool preserve = (unusedBits_format == SDWA_UNUSED_PRESERVE); - T retVal = 0, origBits_thisByte = 0, currBits_thisByte = 0, - origBits_thisWord = 0, currBits_thisWord = 0, newBits = 0; - - // if we're preserving all of the bits, then we can immediately return - if (unusedBits_format == SDWA_UNUSED_PRESERVE) { - assert(sel == SDWA_DWORD); - return currDstVal; - } else if (sel == SDWA_DWORD) { - // NOTE: users may set the unused bits variable to anything in this - // scenario, because it will be ignored - return currDstVal; - } - - if (sel < SDWA_WORD_0) { // we are selecting 1 byte - // if we sign extended depends on upper-most bit of byte 0 - signExt = (signExt && - (bits(currDstVal, Gcn3ISA::MSB_PER_WORD, 0) & 0x80)); - - for (int byte = 0; byte < 4; ++byte) { - low_bit = byte * Gcn3ISA::BITS_PER_BYTE; - high_bit = low_bit + Gcn3ISA::MSB_PER_BYTE; - /* - Options: - 1. byte == sel: we are keeping all bits in this byte - 2. preserve is set: keep this byte as is because the - output preserve flag is set - 3. byte > sel && signExt: we're sign extending and - this byte is one of the bytes we need to sign extend - */ - origBits_thisByte = bits(origDstVal, high_bit, low_bit); - currBits_thisByte = bits(currDstVal, high_bit, low_bit); - newBits = ((byte == sel) ? origBits_thisByte : - ((preserve) ? currBits_thisByte : - (((byte > sel) && signExt) ? 0xff : 0))); - retVal = insertBits(retVal, high_bit, low_bit, newBits); - } - } else if (sel < SDWA_DWORD) { // we are selecting 1 word - low_bit = 0; - high_bit = low_bit + Gcn3ISA::MSB_PER_WORD; - // if we sign extended depends on upper-most bit of word 0 - signExt = (signExt && - (bits(currDstVal, high_bit, low_bit) & 0x8000)); - - for (int word = 0; word < 2; ++word) { - low_bit = word * Gcn3ISA::BITS_PER_WORD; - high_bit = low_bit + Gcn3ISA::MSB_PER_WORD; - /* - Options: - 1. word == sel & 1: we are keeping all bits in this word - 2. preserve is set: keep this word as is because the - output preserve flag is set - 3. word > (sel & 1) && signExt: we're sign extending and - this word is one of the words we need to sign extend - */ - origBits_thisWord = bits(origDstVal, high_bit, low_bit); - currBits_thisWord = bits(currDstVal, high_bit, low_bit); - newBits = ((word == (sel & 0x1)) ? origBits_thisWord : - ((preserve) ? currBits_thisWord : - (((word > (sel & 0x1)) && signExt) ? 0xffff : 0))); - retVal = insertBits(retVal, high_bit, low_bit, newBits); - } - } else { - assert(sel != SDWA_DWORD); // should have returned earlier - panic("Unimplemented SDWA select operation: %d\n", sel); - } - - return retVal; - } - - - /** - * sdwaInstDestImpl is a helper function that selects the appropriate - * bits/bytes for the inputted dest operand of an SDWA instruction, does - * the appropriate masking/padding/sign extending for the non-selected - * bits/bytes, and updates the operands values with the resultant value. - * - * The desired behavior is: - * 1. Select the appropriate bits/bytes based on sel: - * 0 (SDWA_BYTE_0): select data[7:0] - * 1 (SDWA_BYTE_1): select data[15:8] - * 2 (SDWA_BYTE_2): select data[23:16] - * 3 (SDWA_BYTE_3): select data[31:24] - * 4 (SDWA_WORD_0): select data[15:0] - * 5 (SDWA_WORD_1): select data[31:16] - * 6 (SDWA_DWORD): select data[31:0] - * 2. either pad, sign extend, or select all bits based on the value of - * unusedBits_format: - * 0 (SDWA_UNUSED_PAD): pad all unused bits with 0 - * 1 (SDWA_UNUSED_SEXT): sign-extend upper bits; pad lower bits w/ 0 - * 2 (SDWA_UNUSED_PRESERVE): select data[31:0] - */ - template - void sdwaInstDstImpl(T & dstOper, T & origDstOper, const bool clamp, - const SDWASelVals sel, - const SDWADstVals unusedBits_format) - { - // iterate over all lanes, setting appropriate, selected value - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - dstOper[lane] = sdwaInstDstImpl_helper(dstOper[lane], - origDstOper[lane], clamp, - sel, unusedBits_format); - } - } - - - /** - * processSDWA_srcHelper is a helper function for implementing sub d-word - * addressing instructions for the src operands. This function may be - * called by many different VOP1/VOP2/VOPC instructions to do operations - * within a register. This function is also agnostic of which operand it - * is operating on, so that it can be called for any src operand. - */ - template - void processSDWA_src_helper(T & currSrc, T & origCurrSrc, - const SDWASelVals src_sel, - const bool src_signExt, const bool src_abs, - const bool src_neg) - { - /** - * STEP 1: check if the absolute value (ABS) or negation (NEG) tags - * are set. If so, do the appropriate action(s) on the src operand. - * - * NOTE: According to the CSim implementation, ABS takes priority over - * NEG. - */ - if (src_neg) { - currSrc.negModifier(); - } - - if (src_abs) { - currSrc.absModifier(); - } - - /** - * STEP 2: select the appropriate bits for each lane of source operand. - */ - sdwaInstSrcImpl(currSrc, origCurrSrc, src_sel, src_signExt); - } - - - /** - * processSDWA_src is a helper function for implementing sub d-word - * addressing instructions for the src operands. This function may be - * called by many different VOP1 instructions to do operations within a - * register. processSDWA_dst is called after the math, while - * processSDWA_src is called before the math. - */ - template - void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T & src0, T & origSrc0) - { - // local variables - const SDWASelVals src0_sel = (SDWASelVals)sdwaInst.SRC0_SEL; - const bool src0_signExt = sdwaInst.SRC0_SEXT; - const bool src0_neg = sdwaInst.SRC0_NEG; - const bool src0_abs = sdwaInst.SRC0_ABS; - - // NOTE: difference between VOP1 and VOP2/VOPC is that there is no src1 - // operand. So ensure that SRC1 fields are not set, then call helper - // function only on src0. - assert(!sdwaInst.SRC1_SEXT); - assert(!sdwaInst.SRC1_NEG); - assert(!sdwaInst.SRC1_ABS); - - processSDWA_src_helper(src0, origSrc0, src0_sel, src0_signExt, - src0_abs, src0_neg); - } - - - /** - * processSDWA_src is a helper function for implementing sub d-word - * addressing instructions. This function may be called by many different - * VOP2/VOPC instructions to do operations within a register. - * processSDWA_dst is called after the math, while processSDWA_src is - * called before the math. - */ - template - void processSDWA_src(InFmt_VOP_SDWA sdwaInst, T & src0, T & origSrc0, - T & src1, T & origSrc1) - { - // local variables - const SDWASelVals src0_sel = (SDWASelVals)sdwaInst.SRC0_SEL; - const bool src0_signExt = sdwaInst.SRC0_SEXT; - const bool src0_neg = sdwaInst.SRC0_NEG; - const bool src0_abs = sdwaInst.SRC0_ABS; - const SDWASelVals src1_sel = (SDWASelVals)sdwaInst.SRC1_SEL; - const bool src1_signExt = sdwaInst.SRC1_SEXT; - const bool src1_neg = sdwaInst.SRC1_NEG; - const bool src1_abs = sdwaInst.SRC1_ABS; - - processSDWA_src_helper(src0, origSrc0, src0_sel, src0_signExt, - src0_abs, src0_neg); - processSDWA_src_helper(src1, origSrc1, src1_sel, src1_signExt, - src1_abs, src1_neg); - } - - - /** - * processSDWA_dst is a helper function for implementing sub d-word - * addressing instructions for the dst operand. This function may be - * called by many different VOP1/VOP2/VOPC instructions to do operations - * within a register. processSDWA_dst is called after the math, while - * processSDWA_src is called before the math. - */ - template - void processSDWA_dst(InFmt_VOP_SDWA sdwaInst, T & dst, T & origDst) - { - // local variables - const SDWADstVals dst_unusedBits_format = - (SDWADstVals)sdwaInst.DST_UNUSED; - const SDWASelVals dst_sel = (SDWASelVals)sdwaInst.DST_SEL; - const bool clamp = sdwaInst.CLAMP; - - /** - * STEP 1: select the appropriate bits for dst and pad/sign-extend as - * appropriate. - */ - sdwaInstDstImpl(dst, origDst, clamp, dst_sel, dst_unusedBits_format); - } -} // namespace Gcn3ISA -} // namespace gem5 - -#endif // __ARCH_GCN3_INSTS_INST_UTIL_HH__ diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc deleted file mode 100644 index b9d29a2204..0000000000 --- a/src/arch/amdgpu/gcn3/insts/instructions.cc +++ /dev/null @@ -1,41675 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/amdgpu/gcn3/insts/instructions.hh" - -#include - -#include "arch/amdgpu/gcn3/insts/inst_util.hh" -#include "debug/GCN3.hh" -#include "debug/GPUSync.hh" -#include "gpu-compute/shader.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - - Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_add_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADD_U32 - - Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() - { - } // ~Inst_SOP2__S_ADD_U32 - - // D.u = S0.u + S1.u; - // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned - // overflow/carry-out. - void - Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() + src1.rawData(); - scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) - >= 0x100000000ULL ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_sub_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUB_U32 - - Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() - { - } // ~Inst_SOP2__S_SUB_U32 - - // D.u = S0.u - S1.u; - // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out. - void - Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() - src1.rawData(); - scc = (src1.rawData() > src0.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_add_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADD_I32 - - Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() - { - } // ~Inst_SOP2__S_ADD_I32 - - // D.i = S0.i + S1.i; - // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed - // overflow. - void - Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() + src1.rawData(); - scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) - && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) - ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_sub_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUB_I32 - - Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() - { - } // ~Inst_SOP2__S_SUB_I32 - - // D.i = S0.i - S1.i; - // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed - // overflow. - void - Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() - src1.rawData(); - scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) - && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_addc_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADDC_U32 - - Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() - { - } // ~Inst_SOP2__S_ADDC_U32 - - // D.u = S0.u + S1.u + SCC; - // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned - // overflow. - void - Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = src0.rawData() + src1.rawData() + scc.rawData(); - scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() - + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_subb_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUBB_U32 - - Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() - { - } // ~Inst_SOP2__S_SUBB_U32 - - // D.u = S0.u - S1.u - SCC; - // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. - void - Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = src0.rawData() - src1.rawData() - scc.rawData(); - scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_min_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MIN_I32 - - Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() - { - } // ~Inst_SOP2__S_MIN_I32 - - // D.i = (S0.i < S1.i) ? S0.i : S1.i; - // SCC = 1 if S0 is chosen as the minimum value. - void - Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::min(src0.rawData(), src1.rawData()); - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_min_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MIN_U32 - - Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() - { - } // ~Inst_SOP2__S_MIN_U32 - - // D.u = (S0.u < S1.u) ? S0.u : S1.u; - // SCC = 1 if S0 is chosen as the minimum value. - void - Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::min(src0.rawData(), src1.rawData()); - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_max_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MAX_I32 - - Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() - { - } // ~Inst_SOP2__S_MAX_I32 - - // D.i = (S0.i > S1.i) ? S0.i : S1.i; - // SCC = 1 if S0 is chosen as the maximum value. - void - Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::max(src0.rawData(), src1.rawData()); - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_max_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MAX_U32 - - Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() - { - } // ~Inst_SOP2__S_MAX_U32 - - // D.u = (S0.u > S1.u) ? S0.u : S1.u; - // SCC = 1 if S0 is chosen as the maximum value. - void - Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::max(src0.rawData(), src1.rawData()); - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cselect_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_CSELECT_B32 - - Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() - { - } // ~Inst_SOP2__S_CSELECT_B32 - - // D.u = SCC ? S0.u : S1.u (conditional select). - void - Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = scc.rawData() ? src0.rawData() : src1.rawData(); - - sdst.write(); - } - - Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cselect_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_CSELECT_B64 - - Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() - { - } // ~Inst_SOP2__S_CSELECT_B64 - - // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). - void - Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = scc.rawData() ? src0.rawData() : src1.rawData(); - - sdst.write(); - } - - Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_and_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_AND_B32 - - Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() - { - } // ~Inst_SOP2__S_AND_B32 - - // D.u = S0.u & S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() & src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_and_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_AND_B64 - - Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() - { - } // ~Inst_SOP2__S_AND_B64 - - // D.u64 = S0.u64 & S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() & src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_or_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_OR_B32 - - Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() - { - } // ~Inst_SOP2__S_OR_B32 - - // D.u = S0.u | S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() | src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_or_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_OR_B64 - - Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() - { - } // ~Inst_SOP2__S_OR_B64 - - // D.u64 = S0.u64 | S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() | src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_XOR_B32 - - Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() - { - } // ~Inst_SOP2__S_XOR_B32 - - // D.u = S0.u ^ S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() ^ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_XOR_B64 - - Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() - { - } // ~Inst_SOP2__S_XOR_B64 - - // D.u64 = S0.u64 ^ S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() ^ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_andn2_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_ANDN2_B32 - - Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() - { - } // ~Inst_SOP2__S_ANDN2_B32 - - // D.u = S0.u & ~S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() &~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_andn2_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_ANDN2_B64 - - Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() - { - } // ~Inst_SOP2__S_ANDN2_B64 - - // D.u64 = S0.u64 & ~S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() &~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_orn2_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_ORN2_B32 - - Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() - { - } // ~Inst_SOP2__S_ORN2_B32 - - // D.u = S0.u | ~S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() |~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_orn2_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_ORN2_B64 - - Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() - { - } // ~Inst_SOP2__S_ORN2_B64 - - // D.u64 = S0.u64 | ~S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() |~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nand_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_NAND_B32 - - Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() - { - } // ~Inst_SOP2__S_NAND_B32 - - // D.u = ~(S0.u & S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() & src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nand_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_NAND_B64 - - Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() - { - } // ~Inst_SOP2__S_NAND_B64 - - // D.u64 = ~(S0.u64 & S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() & src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_NOR_B32 - - Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() - { - } // ~Inst_SOP2__S_NOR_B32 - - // D.u = ~(S0.u | S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() | src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_NOR_B64 - - Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() - { - } // ~Inst_SOP2__S_NOR_B64 - - // D.u64 = ~(S0.u64 | S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() | src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xnor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_XNOR_B32 - - Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() - { - } // ~Inst_SOP2__S_XNOR_B32 - - // D.u = ~(S0.u ^ S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() ^ src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xnor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_XNOR_B64 - - Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() - { - } // ~Inst_SOP2__S_XNOR_B64 - - // D.u64 = ~(S0.u64 ^ S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() ^ src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshl_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHL_B32 - - Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() - { - } // ~Inst_SOP2__S_LSHL_B32 - - // D.u = S0.u << S1.u[4:0]; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshl_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHL_B64 - - Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() - { - } // ~Inst_SOP2__S_LSHL_B64 - - // D.u64 = S0.u64 << S1.u[5:0]; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshr_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHR_B32 - - Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() - { - } // ~Inst_SOP2__S_LSHR_B32 - - // D.u = S0.u >> S1.u[4:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to zero. - void - Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshr_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHR_B64 - - Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() - { - } // ~Inst_SOP2__S_LSHR_B64 - - // D.u64 = S0.u64 >> S1.u[5:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to zero. - void - Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_ashr_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ASHR_I32 - - Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() - { - } // ~Inst_SOP2__S_ASHR_I32 - - // D.i = signext(S0.i) >> S1.u[4:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to the sign bit of the input value. - void - Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_ashr_i64") - { - setFlag(ALU); - } // Inst_SOP2__S_ASHR_I64 - - Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() - { - } // ~Inst_SOP2__S_ASHR_I64 - - // D.i64 = signext(S0.i64) >> S1.u[5:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to the sign bit of the input value. - void - Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfm_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFM_B32 - - Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() - { - } // ~Inst_SOP2__S_BFM_B32 - - // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). - void - Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) - << bits(src1.rawData(), 4, 0); - - sdst.write(); - } - - Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfm_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFM_B64 - - Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() - { - } // ~Inst_SOP2__S_BFM_B64 - - // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). - void - Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) - << bits(src1.rawData(), 5, 0); - - sdst.write(); - } - - Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_I32 - - Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() - { - } // ~Inst_SOP2__S_MUL_I32 - - // D.i = S0.i * S1.i. - void - Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = src0.rawData() * src1.rawData(); - - sdst.write(); - } - - Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_U32 - - Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() - { - } // ~Inst_SOP2__S_BFE_U32 - - // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is - // field width. - // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_I32 - - Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() - { - } // ~Inst_SOP2__S_BFE_I32 - - // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is - // field width. - // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1); - // Sign-extend the result; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_u64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_U64 - - Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() - { - } // ~Inst_SOP2__S_BFE_U64 - - // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is - // field width. - // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_i64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_I64 - - Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() - { - } // ~Inst_SOP2__S_BFE_I64 - - // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is - // field width. - // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1); - // Sign-extend result; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cbranch_g_fork") - { - setFlag(Branch); - } // Inst_SOP2__S_CBRANCH_G_FORK - - Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() - { - } // ~Inst_SOP2__S_CBRANCH_G_FORK - - // Conditional branch using branch-stack. - // S0 = compare mask(vcc or any sgpr) and - // S1 = 64-bit byte address of target instruction. - void - Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_absdiff_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ABSDIFF_I32 - - Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() - { - } // ~Inst_SOP2__S_ABSDIFF_I32 - - // D.i = S0.i - S1.i; - // if (D.i < 0) then D.i = -D.i; - // SCC = 1 if result is non-zero. - // Compute the absolute value of difference between two values. - void - Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - sdst = std::abs(src0.rawData() - src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( - InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_rfe_restore_b64") - { - } // Inst_SOP2__S_RFE_RESTORE_B64 - - Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() - { - } // ~Inst_SOP2__S_RFE_RESTORE_B64 - - // Return from exception handler and continue. - void - Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_movk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_MOVK_I32 - - Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() - { - } // ~Inst_SOPK__S_MOVK_I32 - - // D.i = signext(SIMM16) (sign extension). - void - Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - sdst = simm16; - - sdst.write(); - } - - Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmovk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMOVK_I32 - - Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() - { - } // ~Inst_SOPK__S_CMOVK_I32 - - // if (SCC) then D.i = signext(SIMM16); - // else NOP. - // Conditional move with sign extension. - void - Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (scc.rawData()) { - sdst = simm16; - sdst.write(); - } - } - - Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_eq_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_EQ_I32 - - Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() - { - } // ~Inst_SOPK__S_CMPK_EQ_I32 - - // SCC = (S0.i == signext(SIMM16)). - void - Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() == simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lg_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LG_I32 - - Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() - { - } // ~Inst_SOPK__S_CMPK_LG_I32 - - // SCC = (S0.i != signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() != simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_gt_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GT_I32 - - Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() - { - } // ~Inst_SOPK__S_CMPK_GT_I32 - - // SCC = (S0.i > signext(SIMM16)). - void - Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() > simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_ge_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GE_I32 - - Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() - { - } // ~Inst_SOPK__S_CMPK_GE_I32 - - // SCC = (S0.i >= signext(SIMM16)). - void - Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() >= simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lt_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LT_I32 - - Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() - { - } // ~Inst_SOPK__S_CMPK_LT_I32 - - // SCC = (S0.i < signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() < simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_le_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LE_I32 - - Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() - { - } // ~Inst_SOPK__S_CMPK_LE_I32 - - // SCC = (S0.i <= signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() <= simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_eq_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_EQ_U32 - - Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() - { - } // ~Inst_SOPK__S_CMPK_EQ_U32 - - // SCC = (S0.u == SIMM16). - void - Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() == simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lg_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LG_U32 - - Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() - { - } // ~Inst_SOPK__S_CMPK_LG_U32 - - // SCC = (S0.u != SIMM16). - void - Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() != simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_gt_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GT_U32 - - Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() - { - } // ~Inst_SOPK__S_CMPK_GT_U32 - - // SCC = (S0.u > SIMM16). - void - Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() > simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_ge_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GE_U32 - - Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() - { - } // ~Inst_SOPK__S_CMPK_GE_U32 - - // SCC = (S0.u >= SIMM16). - void - Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() >= simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lt_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LT_U32 - - Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() - { - } // ~Inst_SOPK__S_CMPK_LT_U32 - - // SCC = (S0.u < SIMM16). - void - Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() < simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_le_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LE_U32 - - Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() - { - } // ~Inst_SOPK__S_CMPK_LE_U32 - - // SCC = (S0.u <= SIMM16). - void - Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() <= simm16) ? 1 : 0; - - scc.write(); - } - - Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_addk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_ADDK_I32 - - Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() - { - } // ~Inst_SOPK__S_ADDK_I32 - - // D.i = D.i + signext(SIMM16); - // SCC = overflow. - void - Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); - scc = (bits(src.rawData(), 31) == bits(simm16, 15) - && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_mulk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_MULK_I32 - - Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() - { - } // ~Inst_SOPK__S_MULK_I32 - - // D.i = D.i * signext(SIMM16). - void - Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - sdst.read(); - - sdst = sdst.rawData() * (ScalarRegI32)sext<16>(simm16); - - sdst.write(); - } - - Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cbranch_i_fork") - { - setFlag(Branch); - } // Inst_SOPK__S_CBRANCH_I_FORK - - Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() - { - } // ~Inst_SOPK__S_CBRANCH_I_FORK - - // Conditional branch using branch-stack. - // S0 = compare mask(vcc or any sgpr), and - // SIMM16 = signed DWORD branch offset relative to next instruction. - void - Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_getreg_b32") - { - } // Inst_SOPK__S_GETREG_B32 - - Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() - { - } // ~Inst_SOPK__S_GETREG_B32 - - // D.u = hardware-reg. Read some or all of a hardware register into the - // LSBs of D. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_setreg_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_SETREG_B32 - - Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() - { - } // ~Inst_SOPK__S_SETREG_B32 - - // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware - // register. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarOperandU32 hwreg(gpuDynInst, hwregId); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - hwreg.read(); - sdst.read(); - - // Store value from SDST to part of the hardware register. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - hwreg = ((hwreg.rawData() & ~mask) - | ((sdst.rawData() << offset) & mask)); - hwreg.write(); - - // set MODE register to control the behavior of single precision - // floating-point numbers: denormal mode or round mode - if (hwregId==1 && size==2 - && (offset==4 || offset==0)) { - warn_once("Be cautious that s_setreg_b32 has no real effect " - "on FP modes: %s\n", gpuDynInst->disassemble()); - return; - } - - // panic if not changing MODE of floating-point numbers - panicUnimplemented(); - } - - Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( - InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_setreg_imm32_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_SETREG_IMM32_B32 - - Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() - { - } // ~Inst_SOPK__S_SETREG_IMM32_B32 - - // Write some or all of the LSBs of IMM32 into a hardware register; this - // instruction requires a 32-bit literal constant. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarOperandU32 hwreg(gpuDynInst, hwregId); - ScalarRegU32 simm32 = extData.imm_u32; - hwreg.read(); - - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - hwreg = ((hwreg.rawData() & ~mask) - | ((simm32 << offset) & mask)); - hwreg.write(); - - if (hwregId==1 && size==2 - && (offset==4 || offset==0)) { - warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " - "on FP modes: %s\n", gpuDynInst->disassemble()); - return; - } - - // panic if not changing MODE of floating-point numbers - panicUnimplemented(); - } - - Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_B32 - - Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() - { - } // ~Inst_SOP1__S_MOV_B32 - - // D.u = S0.u. - void - Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } - - Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_B64 - - Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() - { - } // ~Inst_SOP1__S_MOV_B64 - - // D.u64 = S0.u64. - void - Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } - - Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cmov_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_CMOV_B32 - - Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() - { - } // ~Inst_SOP1__S_CMOV_B32 - - // if (SCC) then D.u = S0.u; - // else NOP. - // Conditional move. - void - Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - scc.read(); - - if (scc.rawData()) { - sdst = src.rawData(); - sdst.write(); - } - } - - Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cmov_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_CMOV_B64 - - Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() - { - } // ~Inst_SOP1__S_CMOV_B64 - - // if (SCC) then D.u64 = S0.u64; - // else NOP. - // Conditional move. - void - Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - scc.read(); - - if (scc.rawData()) { - sdst = src.rawData(); - sdst.write(); - } - } - - Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_not_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_NOT_B32 - - Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() - { - } // ~Inst_SOP1__S_NOT_B32 - - // D.u = ~S0.u; - // SCC = 1 if result is non-zero. - // Bitwise negation. - void - Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = ~src.rawData(); - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_not_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_NOT_B64 - - Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() - { - } // ~Inst_SOP1__S_NOT_B64 - - // D.u64 = ~S0.u64; - // SCC = 1 if result is non-zero. - // Bitwise negation. - void - Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = ~src.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_wqm_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_WQM_B32 - - Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() - { - } // ~Inst_SOP1__S_WQM_B32 - - // Computes whole quad mode for an active/valid mask. - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wholeQuadMode(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_wqm_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_WQM_B64 - - Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() - { - } // ~Inst_SOP1__S_WQM_B64 - - // Computes whole quad mode for an active/valid mask. - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wholeQuadMode(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_brev_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BREV_B32 - - Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() - { - } // ~Inst_SOP1__S_BREV_B32 - - // D.u[31:0] = S0.u[0:31] (reverse bits). - void - Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = reverseBits(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_brev_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BREV_B64 - - Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() - { - } // ~Inst_SOP1__S_BREV_B64 - - // D.u64[63:0] = S0.u64[0:63] (reverse bits). - void - Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = reverseBits(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT0_I32_B32 - - Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() - { - } // ~Inst_SOP1__S_BCNT0_I32_B32 - - // D.i = CountZeroBits(S0.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = countZeroBits(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT0_I32_B64 - - Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() - { - } // ~Inst_SOP1__S_BCNT0_I32_B64 - - // D.i = CountZeroBits(S0.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = countZeroBits(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT1_I32_B32 - - Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() - { - } // ~Inst_SOP1__S_BCNT1_I32_B32 - - // D.i = CountOneBits(S0.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = popCount(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT1_I32_B64 - - Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() - { - } // ~Inst_SOP1__S_BCNT1_I32_B64 - - // D.i = CountOneBits(S0.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = popCount(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff0_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FF0_I32_B32 - - Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() - { - } // ~Inst_SOP1__S_FF0_I32_B32 - - // D.i = FindFirstZero(S0.u); - // If no zeros are found, return -1. - // Returns the bit position of the first zero from the LSB. - void - Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstZero(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff0_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FF0_I32_B64 - - Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() - { - } // ~Inst_SOP1__S_FF0_I32_B64 - - // D.i = FindFirstZero(S0.u64); - // If no zeros are found, return -1. - // Returns the bit position of the first zero from the LSB. - void - Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstZero(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff1_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FF1_I32_B32 - - Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() - { - } // ~Inst_SOP1__S_FF1_I32_B32 - - // D.i = FindFirstOne(S0.u); - // If no ones are found, return -1. - // Returns the bit position of the first one from the LSB. - void - Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstOne(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff1_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FF1_I32_B64 - - Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() - { - } // ~Inst_SOP1__S_FF1_I32_B64 - - // D.i = FindFirstOne(S0.u64); - // If no ones are found, return -1. - // Returns the bit position of the first one from the LSB. - void - Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstOne(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_B32 - - Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() - { - } // ~Inst_SOP1__S_FLBIT_I32_B32 - - // D.i = FindFirstOne(S0.u); - // If no ones are found, return -1. - // Counts how many zeros before the first one starting from the MSB. - void - Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = countZeroBitsMsb(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_B64 - - Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() - { - } // ~Inst_SOP1__S_FLBIT_I32_B64 - - // D.i = FindFirstOne(S0.u64); - // If no ones are found, return -1. - // Counts how many zeros before the first one starting from the MSB. - void - Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = countZeroBitsMsb(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32 - - Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() - { - } // ~Inst_SOP1__S_FLBIT_I32 - - // D.i = FirstOppositeSignBit(S0.i); - // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. - // Counts how many bits in a row (from MSB to LSB) are the same as the - // sign bit. - void - Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = firstOppositeSignBit(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_i64") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_I64 - - Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() - { - } // ~Inst_SOP1__S_FLBIT_I32_I64 - - // D.i = FirstOppositeSignBit(S0.i64); - // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. - // Counts how many bits in a row (from MSB to LSB) are the same as the - // sign bit. - void - Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = firstOppositeSignBit(src.rawData()); - - sdst.write(); - } - - Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_sext_i32_i8") - { - setFlag(ALU); - } // Inst_SOP1__S_SEXT_I32_I8 - - Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() - { - } // ~Inst_SOP1__S_SEXT_I32_I8 - - // D.i = signext(S0.i[7:0]) (sign extension). - void - Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = sext::digits>( - bits(src.rawData(), 7, 0)); - - sdst.write(); - } - - Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_sext_i32_i16") - { - setFlag(ALU); - } // Inst_SOP1__S_SEXT_I32_I16 - - Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() - { - } // ~Inst_SOP1__S_SEXT_I32_I16 - - // D.i = signext(S0.i[15:0]) (sign extension). - void - Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = sext::digits>( - bits(src.rawData(), 15, 0)); - - sdst.write(); - } - - Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset0_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET0_B32 - - Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() - { - } // ~Inst_SOP1__S_BITSET0_B32 - - // D.u[S0.u[4:0]] = 0. - void - Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 4, 0), 0); - - sdst.write(); - } - - Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset0_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET0_B64 - - Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() - { - } // ~Inst_SOP1__S_BITSET0_B64 - - // D.u64[S0.u[5:0]] = 0. - void - Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 5, 0), 0); - - sdst.write(); - } - - Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset1_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET1_B32 - - Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() - { - } // ~Inst_SOP1__S_BITSET1_B32 - - // D.u[S0.u[4:0]] = 1. - void - Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 4, 0), 1); - - sdst.write(); - } - - Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset1_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET1_B64 - - Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() - { - } // ~Inst_SOP1__S_BITSET1_B64 - - // D.u64[S0.u[5:0]] = 1. - void - Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 5, 0), 1); - - sdst.write(); - } - - Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_getpc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_GETPC_B64 - - Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() - { - } // ~Inst_SOP1__S_GETPC_B64 - - // D.u64 = PC + 4. - // Destination receives the byte address of the next instruction. - void - Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = wf->pc(); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - sdst = pc + 4; - - sdst.write(); - } - - Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_setpc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_SETPC_B64 - - Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() - { - } // ~Inst_SOP1__S_SETPC_B64 - - // PC = S0.u64. - // S0.u64 is a byte address of the instruction to jump to. - void - Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - - src.read(); - - wf->pc(src.rawData()); - } - - Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_swappc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_SWAPPC_B64 - - Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() - { - } // ~Inst_SOP1__S_SWAPPC_B64 - - // D.u64 = PC + 4; PC = S0.u64. - // S0.u64 is a byte address of the instruction to jump to. - void - Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = wf->pc(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = pc + 4; - - wf->pc(src.rawData()); - sdst.write(); - } - - Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_rfe_b64") - { - } // Inst_SOP1__S_RFE_B64 - - Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() - { - } // ~Inst_SOP1__S_RFE_B64 - - // Return from exception handler and continue. - void - Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_and_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_AND_SAVEEXEC_B64 - - Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = S0.u64 & EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() & wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_or_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_OR_SAVEEXEC_B64 - - Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = S0.u64 | EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() | wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_xor_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_XOR_SAVEEXEC_B64 - - Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = S0.u64 ^ EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = S0.u64 & ~EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_orn2_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = S0.u64 | ~EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() |~ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_nand_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_NAND_SAVEEXEC_B64 - - Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = ~(S0.u64 & EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_nor_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_NOR_SAVEEXEC_B64 - - Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = ~(S0.u64 | EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_xnor_saveexec_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - // D.u64 = EXEC; - // EXEC = ~(S0.u64 ^ EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_quadmask_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_QUADMASK_B32 - - Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32() - { - } // ~Inst_SOP1__S_QUADMASK_B32 - - // D.u = QuadMask(S0.u): - // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0; - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = quadMask(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_quadmask_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_QUADMASK_B64 - - Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64() - { - } // ~Inst_SOP1__S_QUADMASK_B64 - - // D.u64 = QuadMask(S0.u64): - // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0; - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = quadMask(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movrels_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELS_B32 - - Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32() - { - } // ~Inst_SOP1__S_MOVRELS_B32 - - // D.u = SGPR[S0.u + M0.u].u (move from relative source). - void - Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData()); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } - - Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movrels_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELS_B64 - - Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64() - { - } // ~Inst_SOP1__S_MOVRELS_B64 - - // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source). - // The index in M0.u must be even for this operation. - void - Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData()); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } - - Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movreld_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELD_B32 - - Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32() - { - } // ~Inst_SOP1__S_MOVRELD_B32 - - // SGPR[D.u + M0.u].u = S0.u (move to relative destination). - void - Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData()); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } - - Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movreld_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELD_B64 - - Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64() - { - } // ~Inst_SOP1__S_MOVRELD_B64 - - // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination). - // The index in M0.u must be even for this operation. - void - Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData()); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } - - Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cbranch_join") - { - setFlag(Branch); - } // Inst_SOP1__S_CBRANCH_JOIN - - Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN() - { - } // ~Inst_SOP1__S_CBRANCH_JOIN - - // Conditional branch join point (end of conditional branch block). - void - Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_abs_i32") - { - setFlag(ALU); - } // Inst_SOP1__S_ABS_I32 - - Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32() - { - } // ~Inst_SOP1__S_ABS_I32 - - // if (S.i < 0) then D.i = -S.i; - // else D.i = S.i; - // SCC = 1 if result is non-zero. - // Integer absolute value. - void - Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = std::abs(src.rawData()); - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } - - Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_fed_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_FED_B32 - - Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32() - { - } // ~Inst_SOP1__S_MOV_FED_B32 - - // D.u = S0.u. - void - Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_set_gpr_idx_idx") - { - } // Inst_SOP1__S_SET_GPR_IDX_IDX - - Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX() - { - } // ~Inst_SOP1__S_SET_GPR_IDX_IDX - - // M0[7:0] = S0.u[7:0]. - // Modify the index used in vector GPR indexing. - void - Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_I32 - - Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32() - { - } // ~Inst_SOPC__S_CMP_EQ_I32 - - // SCC = (S0.i == S1.i). - void - Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_I32 - - Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32() - { - } // ~Inst_SOPC__S_CMP_LG_I32 - - // SCC = (S0.i != S1.i). - void - Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_gt_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GT_I32 - - Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32() - { - } // ~Inst_SOPC__S_CMP_GT_I32 - - // SCC = (S0.i > S1.i). - void - Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_ge_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GE_I32 - - Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32() - { - } // ~Inst_SOPC__S_CMP_GE_I32 - - // SCC = (S0.i >= S1.i). - void - Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lt_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LT_I32 - - Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32() - { - } // ~Inst_SOPC__S_CMP_LT_I32 - - // SCC = (S0.i < S1.i). - void - Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_le_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LE_I32 - - Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32() - { - } // ~Inst_SOPC__S_CMP_LE_I32 - - // SCC = (S0.i <= S1.i). - void - Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_U32 - - Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32() - { - } // ~Inst_SOPC__S_CMP_EQ_U32 - - // SCC = (S0.u == S1.u). - void - Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_U32 - - Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32() - { - } // ~Inst_SOPC__S_CMP_LG_U32 - - // SCC = (S0.u != S1.u). - void - Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_gt_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GT_U32 - - Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32() - { - } // ~Inst_SOPC__S_CMP_GT_U32 - - // SCC = (S0.u > S1.u). - void - Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_ge_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GE_U32 - - Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32() - { - } // ~Inst_SOPC__S_CMP_GE_U32 - - // SCC = (S0.u >= S1.u). - void - Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lt_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LT_U32 - - Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32() - { - } // ~Inst_SOPC__S_CMP_LT_U32 - - // SCC = (S0.u < S1.u). - void - Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_le_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LE_U32 - - Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32() - { - } // ~Inst_SOPC__S_CMP_LE_U32 - - // SCC = (S0.u <= S1.u). - void - Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp0_b32") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP0_B32 - - Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32() - { - } // ~Inst_SOPC__S_BITCMP0_B32 - - // SCC = (S0.u[S1.u[4:0]] == 0). - void - Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp1_b32") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP1_B32 - - Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32() - { - } // ~Inst_SOPC__S_BITCMP1_B32 - - // SCC = (S0.u[S1.u[4:0]] == 1). - void - Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp0_b64") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP0_B64 - - Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64() - { - } // ~Inst_SOPC__S_BITCMP0_B64 - - // SCC = (S0.u64[S1.u[5:0]] == 0). - void - Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp1_b64") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP1_B64 - - Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64() - { - } // ~Inst_SOPC__S_BITCMP1_B64 - - // SCC = (S0.u64[S1.u[5:0]] == 1). - void - Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_setvskip") - { - setFlag(UnconditionalJump); - } // Inst_SOPC__S_SETVSKIP - - Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP() - { - } // ~Inst_SOPC__S_SETVSKIP - - // VSKIP = S0.u[S1.u[4:0]]. - // Enables and disables VSKIP mode. - // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are - // issued. - void - Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_set_gpr_idx_on") - { - } // Inst_SOPC__S_SET_GPR_IDX_ON - - Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON() - { - } // ~Inst_SOPC__S_SET_GPR_IDX_ON - - // MODE.gpr_idx_en = 1; - // M0[7:0] = S0.u[7:0]; - // M0[15:12] = SIMM4 (direct contents of S1 field); - // Remaining bits of M0 are unmodified. - // Enable GPR indexing mode. Vector operations after this will perform - // relative GPR addressing based on the contents of M0. - // The raw contents of the S1 field are read and used to set the enable - // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and - // S1[3] = VDST_REL. - void - Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_u64") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_U64 - - Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64() - { - } // ~Inst_SOPC__S_CMP_EQ_U64 - - // SCC = (S0.i64 == S1.i64). - void - Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_u64") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_U64 - - Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64() - { - } // ~Inst_SOPC__S_CMP_LG_U64 - - // SCC = (S0.i64 != S1.i64). - void - Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } - - Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_nop") - { - setFlag(Nop); - } // Inst_SOPP__S_NOP - - Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP() - { - } // ~Inst_SOPP__S_NOP - - // Do nothing. - void - Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } - - Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_endpgm") - { - setFlag(EndOfKernel); - } // Inst_SOPP__S_ENDPGM - - Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM() - { - } // ~Inst_SOPP__S_ENDPGM - - // End of program; terminate wavefront. - void - Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ComputeUnit *cu = gpuDynInst->computeUnit(); - - // delete extra instructions fetched for completed work-items - wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, - wf->instructionBuffer.end()); - - if (wf->pendingFetch) { - wf->dropFetch = true; - } - - wf->computeUnit->fetchStage.fetchUnit(wf->simdId) - .flushBuf(wf->wfSlotId); - wf->setStatus(Wavefront::S_STOPPED); - - int refCount = wf->computeUnit->getLds() - .decreaseRefCounter(wf->dispatchId, wf->wgId); - - /** - * The parent WF of this instruction is exiting, therefore - * it should not participate in this barrier any longer. This - * prevents possible deadlock issues if WFs exit early. - */ - int bar_id = WFBarrier::InvalidID; - if (wf->hasBarrier()) { - assert(wf->getStatus() != Wavefront::S_BARRIER); - bar_id = wf->barrierId(); - assert(bar_id != WFBarrier::InvalidID); - wf->releaseBarrier(); - cu->decMaxBarrierCnt(bar_id); - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the " - "program and decrementing max barrier count for " - "barrier Id%d. New max count: %d.\n", cu->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id, - cu->maxBarrierCnt(bar_id)); - } - - DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", - wf->computeUnit->cu_id, wf->wgId, refCount); - - wf->computeUnit->registerManager->freeRegisters(wf); - wf->computeUnit->stats.completedWfs++; - wf->computeUnit->activeWaves--; - - panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less " - "than zero\n", wf->computeUnit->cu_id); - - DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", - wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId); - - for (int i = 0; i < wf->vecReads.size(); i++) { - if (wf->rawDist.find(i) != wf->rawDist.end()) { - wf->stats.readsPerWrite.sample(wf->vecReads.at(i)); - } - } - wf->vecReads.clear(); - wf->rawDist.clear(); - wf->lastInstExec = 0; - - if (!refCount) { - /** - * If all WFs have finished, and hence the WG has finished, - * then we can free up the barrier belonging to the parent - * WG, but only if we actually used a barrier (i.e., more - * than one WF in the WG). - */ - if (bar_id != WFBarrier::InvalidID) { - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are " - "now complete. Releasing barrier Id%d.\n", cu->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId, - wf->barrierId()); - cu->releaseBarrier(bar_id); - } - - /** - * Last wavefront of the workgroup has executed return. If the - * workgroup is not the final one in the kernel, then simply - * retire it; however, if it is the final one (i.e., indicating - * the kernel end) then release operation is needed. - */ - - // check whether the workgroup is indicating the kernel end (i.e., - // the last workgroup in the kernel). - bool kernelEnd = - wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); - // further check whether 'release @ kernel end' is needed - bool relNeeded = - wf->computeUnit->shader->impl_kern_end_rel; - - // if not a kernel end or no release needed, retire the workgroup - // directly - if (!kernelEnd || !relNeeded) { - wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); - wf->setStatus(Wavefront::S_STOPPED); - wf->computeUnit->stats.completedWGs++; - - return; - } - - /** - * If a kernel end and release needed, inject a memory sync and - * retire the workgroup after receving all acks. - */ - setFlag(MemSync); - setFlag(GlobalSegment); - // Notify Memory System of Kernel Completion - wf->setStatus(Wavefront::S_RETURNING); - gpuDynInst->simdId = wf->simdId; - gpuDynInst->wfSlotId = wf->wfSlotId; - gpuDynInst->wfDynId = wf->wfDynId; - - DPRINTF(GPUExec, "inject global memory fence for CU%d: " - "WF[%d][%d][%d]\n", wf->computeUnit->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId); - - // call shader to prepare the flush operations - wf->computeUnit->shader->prepareFlush(gpuDynInst); - - wf->computeUnit->stats.completedWGs++; - } else { - wf->computeUnit->shader->dispatcher().scheduleDispatch(); - } - } - - - Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_branch") - { - setFlag(Branch); - } // Inst_SOPP__S_BRANCH - - Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH() - { - } // ~Inst_SOPP__S_BRANCH - - // PC = PC + signext(SIMM16 * 4) + 4 (short jump). - void - Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - - wf->pc(pc); - } - - Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_wakeup") - { - } // Inst_SOPP__S_WAKEUP - - Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP() - { - } // ~Inst_SOPP__S_WAKEUP - - // Allow a wave to wakeup all the other waves in its workgroup to force - // them to wake up immediately from an S_SLEEP instruction. The wakeup is - // ignored if the waves are not sleeping. - void - Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_scc0") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_SCC0 - - Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0() - { - } // ~Inst_SOPP__S_CBRANCH_SCC0 - - // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (!scc.rawData()) { - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - } - - wf->pc(pc); - } - - Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_scc1") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_SCC1 - - Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1() - { - } // ~Inst_SOPP__S_CBRANCH_SCC1 - - // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (scc.rawData()) { - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - } - - wf->pc(pc); - } - - Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_vccz") - { - setFlag(Branch); - setFlag(ReadsVCC); - } // Inst_SOPP__S_CBRANCH_VCCZ - - Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ() - { - } // ~Inst_SOPP__S_CBRANCH_VCCZ - - // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - - vcc.read(); - - if (!vcc.rawData()) { - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - } - - wf->pc(pc); - } - - Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_vccnz") - { - setFlag(Branch); - setFlag(ReadsVCC); - } // Inst_SOPP__S_CBRANCH_VCCNZ - - Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ() - { - } // ~Inst_SOPP__S_CBRANCH_VCCNZ - - // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - vcc.read(); - - if (vcc.rawData()) { - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - wf->pc(pc); - } - } - - Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_execz") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_EXECZ - - Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ() - { - } // ~Inst_SOPP__S_CBRANCH_EXECZ - - // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (wf->execMask().none()) { - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - wf->pc(pc); - } - } - - Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_execnz") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_EXECNZ - - Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ() - { - } // ~Inst_SOPP__S_CBRANCH_EXECNZ - - // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (wf->execMask().any()) { - Addr pc = wf->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL; - wf->pc(pc); - } - } - - Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_barrier") - { - setFlag(MemBarrier); - } // Inst_SOPP__S_BARRIER - - Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER() - { - } // ~Inst_SOPP__S_BARRIER - - /** - * Synchronize waves within a workgroup. If not all waves of the workgroup - * have been created yet, wait for entire group before proceeding. If some - * waves in the wokgroup have already terminated, this waits on only the - * surviving waves. - */ - void - Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ComputeUnit *cu = gpuDynInst->computeUnit(); - - if (wf->hasBarrier()) { - int bar_id = wf->barrierId(); - cu->incNumAtBarrier(bar_id); - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " - "barrier Id%d. %d waves now at barrier, %d waves " - "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId, - wf->wfDynId, bar_id, cu->numAtBarrier(bar_id), - cu->numYetToReachBarrier(bar_id)); - } - } // execute - // --- Inst_SOPP__S_SETKILL class methods --- - - Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_setkill") - { - } // Inst_SOPP__S_SETKILL - - Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL() - { - } // ~Inst_SOPP__S_SETKILL - - void - Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_waitcnt") - { - setFlag(ALU); - setFlag(Waitcnt); - } // Inst_SOPP__S_WAITCNT - - Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT() - { - } // ~Inst_SOPP__S_WAITCNT - - // Wait for the counts of outstanding lds, vector-memory and - // export/vmem-write-data to be at or below the specified levels. - // SIMM16[3:0] = vmcount (vector memory operations), - // SIMM16[6:4] = export/mem-write-data count, - // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count). - void - Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 vm_cnt = 0; - ScalarRegI32 exp_cnt = 0; - ScalarRegI32 lgkm_cnt = 0; - vm_cnt = bits(instData.SIMM16, 3, 0); - exp_cnt = bits(instData.SIMM16, 6, 4); - lgkm_cnt = bits(instData.SIMM16, 12, 8); - gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt); - } - - Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sethalt") - { - } // Inst_SOPP__S_SETHALT - - Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT() - { - } // ~Inst_SOPP__S_SETHALT - - void - Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sleep") - { - setFlag(ALU); - setFlag(Sleep); - } // Inst_SOPP__S_SLEEP - - Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() - { - } // ~Inst_SOPP__S_SLEEP - - // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks. - void - Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; - gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); - // sleep duration is specified in multiples of 64 cycles - gpuDynInst->wavefront()->setSleepTime(64 * simm16); - } // execute - // --- Inst_SOPP__S_SETPRIO class methods --- - - Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_setprio") - { - } // Inst_SOPP__S_SETPRIO - - Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO() - { - } // ~Inst_SOPP__S_SETPRIO - - // User settable wave priority is set to SIMM16[1:0]. 0 = lowest, - // 3 = highest. - void - Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sendmsg") - { - } // Inst_SOPP__S_SENDMSG - - Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG() - { - } // ~Inst_SOPP__S_SENDMSG - - void - Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sendmsghalt") - { - } // Inst_SOPP__S_SENDMSGHALT - - Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT() - { - } // ~Inst_SOPP__S_SENDMSGHALT - - void - Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_trap") - { - } // Inst_SOPP__S_TRAP - - Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP() - { - } // ~Inst_SOPP__S_TRAP - - // Enter the trap handler. - void - Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_icache_inv") - { - } // Inst_SOPP__S_ICACHE_INV - - Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV() - { - } // ~Inst_SOPP__S_ICACHE_INV - - // Invalidate entire L1 instruction cache. - void - Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_incperflevel") - { - } // Inst_SOPP__S_INCPERFLEVEL - - Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL() - { - } // ~Inst_SOPP__S_INCPERFLEVEL - - void - Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_decperflevel") - { - } // Inst_SOPP__S_DECPERFLEVEL - - Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL() - { - } // ~Inst_SOPP__S_DECPERFLEVEL - - void - Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_ttracedata") - { - } // Inst_SOPP__S_TTRACEDATA - - Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA() - { - } // ~Inst_SOPP__S_TTRACEDATA - - void - Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS - - Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS - - void - Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbguser") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGUSER - - Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGUSER - - void - Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER:: - ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - void - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: - ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - void - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_endpgm_saved") - { - } // Inst_SOPP__S_ENDPGM_SAVED - - Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED() - { - } // ~Inst_SOPP__S_ENDPGM_SAVED - - // End of program. - void - Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_set_gpr_idx_off") - { - } // Inst_SOPP__S_SET_GPR_IDX_OFF - - Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF() - { - } // ~Inst_SOPP__S_SET_GPR_IDX_OFF - - // MODE.gpr_idx_en = 0. - // Clear GPR indexing mode. Vector operations after this will not perform - // relative GPR addressing regardless of the contents of M0. - void - Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_set_gpr_idx_mode") - { - } // Inst_SOPP__S_SET_GPR_IDX_MODE - - Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE() - { - } // ~Inst_SOPP__S_SET_GPR_IDX_MODE - - // M0[15:12] = SIMM4. - // Modify the mode used for vector GPR indexing. - // The raw contents of the source field are read and used to set the enable - // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL - // and SIMM4[3] = VDST_REL. - void - Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORD - - Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD() - { - } // ~Inst_SMEM__S_LOAD_DWORD - - /** - * Read 1 dword from scalar data cache. If the offset is specified as an - * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are - * ignored). If the offset is specified as an immediate 20-bit constant, - * the constant is an unsigned byte offset. - */ - void - Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX2 - - Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2() - { - } // ~Inst_SMEM__S_LOAD_DWORDX2 - - /** - * Read 2 dwords from scalar data cache. See s_load_dword for details on - * the offset input. - */ - void - Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX4 - - Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4() - { - } // ~Inst_SMEM__S_LOAD_DWORDX4 - - // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX8 - - Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8() - { - } // ~Inst_SMEM__S_LOAD_DWORDX8 - - // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<8>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX16 - - Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16() - { - } // ~Inst_SMEM__S_LOAD_DWORDX16 - - // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<16>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORD - - Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD - - // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the - // offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 1 request, size 32 - ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - // use U64 because 2 requests, each size 32 - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 4 requests, each size 32 - ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<8>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 8 requests, each size 32 - ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<16>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 16 requests, each size 32 - ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - - Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORD - - Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD() - { - } // ~Inst_SMEM__S_STORE_DWORD - - // Write 1 dword to scalar data cache. - // If the offset is specified as an SGPR, the SGPR contains an unsigned - // BYTE offset (the 2 LSBs are ignored). - // If the offset is specified as an immediate 20-bit constant, the - // constant is an unsigned BYTE offset. - void - Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(ScalarRegU32)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORDX2 - - Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2() - { - } // ~Inst_SMEM__S_STORE_DWORDX2 - - // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(ScalarRegU64)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORDX4 - - Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4() - { - } // ~Inst_SMEM__S_STORE_DWORDX4 - - // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - 4 * sizeof(ScalarRegU32)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } - - void - Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORD - - Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORD - - // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the - // offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_inv") - { - } // Inst_SMEM__S_DCACHE_INV - - Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV() - { - } // ~Inst_SMEM__S_DCACHE_INV - - // Invalidate the scalar data cache. - void - Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_wb") - { - } // Inst_SMEM__S_DCACHE_WB - - Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB() - { - } // ~Inst_SMEM__S_DCACHE_WB - - // Write back dirty data in the scalar data cache. - void - Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_inv_vol") - { - } // Inst_SMEM__S_DCACHE_INV_VOL - - Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL() - { - } // ~Inst_SMEM__S_DCACHE_INV_VOL - - // Invalidate the scalar data cache volatile lines. - void - Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_wb_vol") - { - } // Inst_SMEM__S_DCACHE_WB_VOL - - Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL() - { - } // ~Inst_SMEM__S_DCACHE_WB_VOL - - // Write back dirty data in the scalar data cache volatile lines. - void - Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_memtime") - { - // s_memtime does not issue a memory request - setFlag(ALU); - } // Inst_SMEM__S_MEMTIME - - Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() - { - } // ~Inst_SMEM__S_MEMTIME - - // Return current 64-bit timestamp. - void - Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst = (ScalarRegU64)gpuDynInst->computeUnit()->curCycle(); - sdst.write(); - } - - Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_memrealtime") - { - } // Inst_SMEM__S_MEMREALTIME - - Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME() - { - } // ~Inst_SMEM__S_MEMREALTIME - - // Return current 64-bit RTC. - void - Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_atc_probe") - { - } // Inst_SMEM__S_ATC_PROBE - - Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE() - { - } // ~Inst_SMEM__S_ATC_PROBE - - void - Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_atc_probe_buffer") - { - } // Inst_SMEM__S_ATC_PROBE_BUFFER - - Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER() - { - } // ~Inst_SMEM__S_ATC_PROBE_BUFFER - - void - Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_cndmask_b32") - { - setFlag(ALU); - setFlag(ReadsVCC); - } // Inst_VOP2__V_CNDMASK_B32 - - Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() - { - } // ~Inst_VOP2__V_CNDMASK_B32 - - // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC - // as a scalar GPR in S2. - void - Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_ADD_F32 - - Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() - { - } // ~Inst_VOP2__V_ADD_F32 - - // D.f = S0.f + S1.f. - void - Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - VecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isDPPInst()) { - VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BOUND_CTRL, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_dpp[lane] + src1[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_SUB_F32 - - Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() - { - } // ~Inst_VOP2__V_SUB_F32 - - // D.f = S0.f - S1.f. - void - Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_SUBREV_F32 - - Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() - { - } // ~Inst_VOP2__V_SUBREV_F32 - - // D.f = S1.f - S0.f. - void - Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MUL_LEGACY_F32 - - Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() - { - } // ~Inst_VOP2__V_MUL_LEGACY_F32 - - // D.f = S0.f * S1.f - void - Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MUL_F32 - - Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() - { - } // ~Inst_VOP2__V_MUL_F32 - - // D.f = S0.f * S1.f. - void - Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_i32_i24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_I32_I24 - - Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() - { - } // ~Inst_VOP2__V_MUL_I32_I24 - - // D.i = S0.i[23:0] * S1.i[23:0]. - void - Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = szext<24>(src0[lane]) * szext<24>(src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_hi_i32_i24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_HI_I32_I24 - - Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() - { - } // ~Inst_VOP2__V_MUL_HI_I32_I24 - - // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32. - void - Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 tmp_src0 = (VecElemI64)szext<24>(src0[lane]); - VecElemI64 tmp_src1 = (VecElemI64)szext<24>(src1[lane]); - - vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_u32_u24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_U32_U24 - - Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() - { - } // ~Inst_VOP2__V_MUL_U32_U24 - - // D.u = S0.u[23:0] * S1.u[23:0]. - void - Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(GCN3, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register " - "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: " - "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " - "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_UNUSED, - extData.iFmt_VOP_SDWA.CLAMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0_sdwa[lane], 23, 0) * - bits(src1[lane], 23, 0); - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * - bits(src1[lane], 23, 0); - } - } - } - - - vdst.write(); - } - - Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_hi_u32_u24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_HI_U32_U24 - - Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() - { - } // ~Inst_VOP2__V_MUL_HI_U32_U24 - - // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32. - void - Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); - VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); - vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MIN_F32 - - Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() - { - } // ~Inst_VOP2__V_MIN_F32 - - // D.f = (S0.f < S1.f ? S0.f : S1.f). - void - Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MAX_F32 - - Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() - { - } // ~Inst_VOP2__V_MAX_F32 - - // D.f = (S0.f >= S1.f ? S0.f : S1.f). - void - Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_I32 - - Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() - { - } // ~Inst_VOP2__V_MIN_I32 - - // D.i = min(S0.i, S1.i). - void - Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_I32 - - Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() - { - } // ~Inst_VOP2__V_MAX_I32 - - // D.i = max(S0.i, S1.i). - void - Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_U32 - - Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() - { - } // ~Inst_VOP2__V_MIN_U32 - - // D.u = min(S0.u, S1.u). - void - Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_U32 - - Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() - { - } // ~Inst_VOP2__V_MAX_U32 - - // D.u = max(S0.u, S1.u). - void - Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshrrev_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHRREV_B32 - - Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() - { - } // ~Inst_VOP2__V_LSHRREV_B32 - - // D.u = S1.u >> S0.u[4:0]. - // The vacated bits are set to zero. - void - Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } - - Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ashrrev_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_ASHRREV_I32 - - Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() - { - } // ~Inst_VOP2__V_ASHRREV_I32 - - // D.i = signext(S1.i) >> S0.i[4:0]. - // The vacated bits are set to the sign bit of the input value. - void - Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } - - Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshlrev_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHLREV_B32 - - Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() - { - } // ~Inst_VOP2__V_LSHLREV_B32 - - // D.u = S1.u << S0.u[4:0]. - void - Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and vdst during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(GCN3, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register " - "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: " - "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " - "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_UNUSED, - extData.iFmt_VOP_SDWA.CLAMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0); - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); - } - } - } - - vdst.write(); - } - - Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_and_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_AND_B32 - - Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() - { - } // ~Inst_VOP2__V_AND_B32 - - // D.u = S0.u & S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_or_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_OR_B32 - - Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() - { - } // ~Inst_VOP2__V_OR_B32 - - // D.u = S0.u | S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(GCN3, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_UNUSED, - extData.iFmt_VOP_SDWA.CLAMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] | src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_xor_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_XOR_B32 - - Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() - { - } // ~Inst_VOP2__V_XOR_B32 - - // D.u = S0.u ^ S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] ^ src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mac_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAC); - } // Inst_VOP2__V_MAC_F32 - - Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() - { - } // ~Inst_VOP2__V_MAC_F32 - - // D.f = S0.f * S1.f + D.f. - void - Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - VecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - vdst.read(); - - if (isDPPInst()) { - VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BOUND_CTRL, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0_dpp[lane], src1[lane], - vdst[lane]); - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - } - - vdst.write(); - } - - Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madmk_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP2__V_MADMK_F32 - - Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() - { - } // ~Inst_VOP2__V_MADMK_F32 - - // D.f = S0.f * K + S1.f; K is a 32-bit inline constant. - // This opcode cannot use the input/output modifiers. - void - Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - VecElemF32 k = extData.imm_f32; - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], k, src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madak_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP2__V_MADAK_F32 - - Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() - { - } // ~Inst_VOP2__V_MADAK_F32 - - // D.f = S0.f * S1.f + K; K is a 32-bit inline constant. - // This opcode cannot use input/output modifiers. - void - Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - VecElemF32 k = extData.imm_f32; - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], k); - } - } - - vdst.write(); - } - - Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_ADD_U32 - - Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() - { - } // ~Inst_VOP2__V_ADD_U32 - - // D.u = S0.u + S1.u; - // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED - // overflow or carry-out. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(GCN3, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_UNUSED, - extData.iFmt_VOP_SDWA.CLAMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] + src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane] - + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); - } - } - } - - vcc.write(); - vdst.write(); - } - - Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_SUB_U32 - - Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() - { - } // ~Inst_VOP2__V_SUB_U32 - - // D.u = S0.u - S1.u; - // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_SUBREV_U32 - - Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() - { - } // ~Inst_VOP2__V_SUBREV_U32 - - // D.u = S1.u - S0.u; - // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_addc_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_ADDC_U32 - - Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32() - { - } // ~Inst_VOP2__V_ADDC_U32 - - // D.u = S0.u + S1.u + VCC[threadId]; - // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0) - // is an UNSIGNED overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] - + bits(vcc.rawData(), lane); - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] - + (VecElemU64)bits(vcc.rawData(), lane, lane)) - >= 0x100000000 ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subb_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_SUBB_U32 - - Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32() - { - } // ~Inst_VOP2__V_SUBB_U32 - - // D.u = S0.u - S1.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src0[lane] - src1[lane] - bits(vcc.rawData(), lane); - vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subbrev_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_SUBBREV_U32 - - Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32() - { - } // ~Inst_VOP2__V_SUBBREV_U32 - - // D.u = S1.u - S0.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src1[lane] - src0[lane] - bits(vcc.rawData(), lane); - vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane)) - > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_ADD_F16 - - Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() - { - } // ~Inst_VOP2__V_ADD_F16 - - // D.f16 = S0.f16 + S1.f16. - void - Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_SUB_F16 - - Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() - { - } // ~Inst_VOP2__V_SUB_F16 - - // D.f16 = S0.f16 - S1.f16. - void - Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_SUBREV_F16 - - Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() - { - } // ~Inst_VOP2__V_SUBREV_F16 - - // D.f16 = S1.f16 - S0.f16. - void - Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MUL_F16 - - Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() - { - } // ~Inst_VOP2__V_MUL_F16 - - // D.f16 = S0.f16 * S1.f16. - void - Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mac_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAC); - } // Inst_VOP2__V_MAC_F16 - - Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() - { - } // ~Inst_VOP2__V_MAC_F16 - - // D.f16 = S0.f16 * S1.f16 + D.f16. - void - Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madmk_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP2__V_MADMK_F16 - - Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() - { - } // ~Inst_VOP2__V_MADMK_F16 - - // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored - // in the following literal DWORD. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // modifiers. - void - Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madak_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP2__V_MADAK_F16 - - Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() - { - } // ~Inst_VOP2__V_MADAK_F16 - - // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored - // in the following literal DWORD. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // modifiers. - void - Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_ADD_U16 - - Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() - { - } // ~Inst_VOP2__V_ADD_U16 - - // D.u16 = S0.u16 + S1.u16. - void - Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_SUB_U16 - - Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() - { - } // ~Inst_VOP2__V_SUB_U16 - - // D.u16 = S0.u16 - S1.u16. - void - Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_SUBREV_U16 - - Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() - { - } // ~Inst_VOP2__V_SUBREV_U16 - - // D.u16 = S1.u16 - S0.u16. - void - Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_lo_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_LO_U16 - - Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() - { - } // ~Inst_VOP2__V_MUL_LO_U16 - - // D.u16 = S0.u16 * S1.u16. - void - Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshlrev_b16") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHLREV_B16 - - Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() - { - } // ~Inst_VOP2__V_LSHLREV_B16 - - // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. - void - Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } - - Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshrrev_b16") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHRREV_B16 - - Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() - { - } // ~Inst_VOP2__V_LSHRREV_B16 - - // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. - // The vacated bits are set to zero. - void - Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ashrrev_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_ASHRREV_I16 - - Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() - { - } // ~Inst_VOP2__V_ASHRREV_I16 - - // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. - // The vacated bits are set to the sign bit of the input value. - void - Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MAX_F16 - - Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() - { - } // ~Inst_VOP2__V_MAX_F16 - - // D.f16 = max(S0.f16, S1.f16). - void - Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MIN_F16 - - Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() - { - } // ~Inst_VOP2__V_MIN_F16 - - // D.f16 = min(S0.f16, S1.f16). - void - Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_U16 - - Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() - { - } // ~Inst_VOP2__V_MAX_U16 - - // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_I16 - - Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() - { - } // ~Inst_VOP2__V_MAX_I16 - - // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_U16 - - Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() - { - } // ~Inst_VOP2__V_MIN_U16 - - // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_I16 - - Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() - { - } // ~Inst_VOP2__V_MIN_I16 - - // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ldexp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_LDEXP_F16 - - Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() - { - } // ~Inst_VOP2__V_LDEXP_F16 - - // D.f16 = S0.f16 * (2 ** S1.i16). - void - Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_nop") - { - setFlag(Nop); - setFlag(ALU); - } // Inst_VOP1__V_NOP - - Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() - { - } // ~Inst_VOP1__V_NOP - - // Do nothing. - void - Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } - - Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_mov_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_MOV_B32 - - Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() - { - } // ~Inst_VOP1__V_MOV_B32 - - // D.u = S0.u. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (isDPPInst()) { - VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src_dpp.read(); - - DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BOUND_CTRL, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - // NOTE: For VOP1, there is no SRC1, so make sure we're not trying - // to negate it or take the absolute value of it - assert(!extData.iFmt_VOP_DPP.SRC1_ABS); - assert(!extData.iFmt_VOP_DPP.SRC1_NEG); - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src_dpp[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_readfirstlane_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_READFIRSTLANE_B32 - - Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() - { - } // ~Inst_VOP1__V_READFIRSTLANE_B32 - - // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data - // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec) - // (Lane# = 0 if exec is zero). Ignores exec mask for the access. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarRegI32 src_lane(0); - ScalarRegU64 exec_mask = wf->execMask().to_ullong(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (exec_mask) { - src_lane = findLsbSet(exec_mask); - } - - sdst = src[src_lane]; - - sdst.write(); - } - - Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_I32_F64 - - Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() - { - } // ~Inst_VOP1__V_CVT_I32_F64 - - // D.i = (int)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_i32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_I32 - - Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() - { - } // ~Inst_VOP1__V_CVT_F64_I32 - - // D.d = (double)S0.i. - void - Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_i32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_I32 - - Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() - { - } // ~Inst_VOP1__V_CVT_F32_I32 - - // D.f = (float)S0.i. - void - Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_u32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_U32 - - Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() - { - } // ~Inst_VOP1__V_CVT_F32_U32 - - // D.f = (float)S0.u. - void - Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_U32_F32 - - Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() - { - } // ~Inst_VOP1__V_CVT_U32_F32 - - // D.u = (unsigned)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_I32_F32 - - Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() - { - } // ~Inst_VOP1__V_CVT_I32_F32 - - // D.i = (int)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_mov_fed_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_MOV_FED_B32 - - Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() - { - } // ~Inst_VOP1__V_MOV_FED_B32 - - // D.u = S0.u; - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F16_F32 - - Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() - { - } // ~Inst_VOP1__V_CVT_F16_F32 - - // D.f16 = flt32_to_flt16(S0.f). - void - Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_f16") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_F16 - - Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() - { - } // ~Inst_VOP1__V_CVT_F32_F16 - - // D.f = flt16_to_flt32(S0.f16). - void - Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_RPI_I32_F32 - - Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() - { - } // ~Inst_VOP1__V_CVT_RPI_I32_F32 - - // D.i = (int)floor(S0.f + 0.5). - void - Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_FLR_I32_F32 - - Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() - { - } // ~Inst_VOP1__V_CVT_FLR_I32_F32 - - // D.i = (int)floor(S0.f). - void - Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_off_f32_i4") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_OFF_F32_I4 - - Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() - { - } // ~Inst_VOP1__V_CVT_OFF_F32_I4 - - // 4-bit signed int to 32-bit float. - void - Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F32_F64 - - Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() - { - } // ~Inst_VOP1__V_CVT_F32_F64 - - // D.f = (float)S0.d. - void - Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_f32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_F32 - - Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() - { - } // ~Inst_VOP1__V_CVT_F64_F32 - - // D.d = (double)S0.f. - void - Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE0 - - Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE0 - - // D.f = (float)(S0.u[7:0]). - void - Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE1 - - Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE1 - - // D.f = (float)(S0.u[15:8]). - void - Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE2 - - Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE2 - - // D.f = (float)(S0.u[23:16]). - void - Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE3 - - Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE3 - - // D.f = (float)(S0.u[31:24]). - void - Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_U32_F64 - - Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() - { - } // ~Inst_VOP1__V_CVT_U32_F64 - - // D.u = (unsigned)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_u32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_U32 - - Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() - { - } // ~Inst_VOP1__V_CVT_F64_U32 - - // D.d = (double)S0.u. - void - Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_TRUNC_F64 - - Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() - { - } // ~Inst_VOP1__V_TRUNC_F64 - - // D.d = trunc(S0.d), return integer part of S0.d. - void - Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CEIL_F64 - - Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() - { - } // ~Inst_VOP1__V_CEIL_F64 - - // D.d = ceil(S0.d); - void - Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RNDNE_F64 - - Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() - { - } // ~Inst_VOP1__V_RNDNE_F64 - - // D.d = round_nearest_even(S0.d). - void - Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FLOOR_F64 - - Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() - { - } // ~Inst_VOP1__V_FLOOR_F64 - - // D.d = floor(S0.d); - void - Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FRACT_F32 - - Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() - { - } // ~Inst_VOP1__V_FRACT_F32 - - // D.f = modf(S0.f). - void - Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } - - Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_TRUNC_F32 - - Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() - { - } // ~Inst_VOP1__V_TRUNC_F32 - - // D.f = trunc(S0.f), return integer part of S0.f. - void - Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst (gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CEIL_F32 - - Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() - { - } // ~Inst_VOP1__V_CEIL_F32 - - // D.f = ceil(S0.f); - void - Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RNDNE_F32 - - Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() - { - } // ~Inst_VOP1__V_RNDNE_F32 - - // D.f = round_nearest_even(S0.f). - void - Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FLOOR_F32 - - Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() - { - } // ~Inst_VOP1__V_FLOOR_F32 - - // D.f = floor(S0.f); - void - Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_EXP_F32 - - Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() - { - } // ~Inst_VOP1__V_EXP_F32 - - // D.f = pow(2.0, S0.f). - void - Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_LOG_F32 - - Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() - { - } // ~Inst_VOP1__V_LOG_F32 - - // D.f = log2(S0.f). - void - Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RCP_F32 - - Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() - { - } // ~Inst_VOP1__V_RCP_F32 - - // D.f = 1.0 / S0.f. - void - Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_iflag_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RCP_IFLAG_F32 - - Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() - { - } // ~Inst_VOP1__V_RCP_IFLAG_F32 - - // D.f = 1.0 / S0.f. - void - Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RSQ_F32 - - Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() - { - } // ~Inst_VOP1__V_RSQ_F32 - - // D.f = 1.0 / sqrt(S0.f). - void - Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RCP_F64 - - Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() - { - } // ~Inst_VOP1__V_RCP_F64 - - // D.d = 1.0 / S0.d. - void - Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = 1.0 / src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RSQ_F64 - - Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() - { - } // ~Inst_VOP1__V_RSQ_F64 - - // D.d = 1.0 / sqrt(S0.d). - void - Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane]) - && !std::signbit(src[lane])) { - vdst[lane] = 0.0; - } else if (std::signbit(src[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_SQRT_F32 - - Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() - { - } // ~Inst_VOP1__V_SQRT_F32 - - // D.f = sqrt(S0.f). - void - Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_SQRT_F64 - - Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() - { - } // ~Inst_VOP1__V_SQRT_F64 - - // D.d = sqrt(S0.d). - void - Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sin_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_SIN_F32 - - Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() - { - } // ~Inst_VOP1__V_SIN_F32 - - // D.f = sin(S0.f * 2 * PI). - void - Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (src[lane] < -256.0 || src[lane] > 256.0) { - vdst[lane] = 0.0; - } else { - vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData()); - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cos_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_COS_F32 - - Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() - { - } // ~Inst_VOP1__V_COS_F32 - - // D.f = cos(S0.f * 2 * PI). - void - Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (src[lane] < -256.0 || src[lane] > 256.0) { - vdst[lane] = 0.0; - } else { - vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData()); - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_not_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_NOT_B32 - - Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() - { - } // ~Inst_VOP1__V_NOT_B32 - - // D.u = ~S0.u. - // Input and output modifiers not supported. - void - Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ~src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_bfrev_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_BFREV_B32 - - Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() - { - } // ~Inst_VOP1__V_BFREV_B32 - - // D.u[31:0] = S0.u[0:31], bitfield reverse. - // Input and output modifiers not supported. - void - Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = reverseBits(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbh_u32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBH_U32 - - Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() - { - } // ~Inst_VOP1__V_FFBH_U32 - - // D.u = position of first 1 in S0.u from MSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOneMsb(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbl_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBL_B32 - - Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() - { - } // ~Inst_VOP1__V_FFBL_B32 - - // D.u = position of first 1 in S0.u from LSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOne(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbh_i32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBH_I32 - - Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() - { - } // ~Inst_VOP1__V_FFBH_I32 - - // D.u = position of first bit different from sign bit in S0.i from MSB; - // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. - void - Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = firstOppositeSignBit(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FREXP_EXP_I32_F64 - - Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() - { - } // ~Inst_VOP1__V_FREXP_EXP_I32_F64 - - void - Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp = 0; - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FREXP_MANT_F64 - - Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() - { - } // ~Inst_VOP1__V_FREXP_MANT_F64 - - void - Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FRACT_F64 - - Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() - { - } // ~Inst_VOP1__V_FRACT_F64 - - void - Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF64 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } - - Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FREXP_EXP_I32_F32 - - Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() - { - } // ~Inst_VOP1__V_FREXP_EXP_I32_F32 - - // frexp(S0.f, Exponent(S0.f)) - // if (S0.f == INF || S0.f == NAN) then D.i = 0; - // else D.i = Exponent(S0.f); - void - Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FREXP_MANT_F32 - - Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() - { - } // ~Inst_VOP1__V_FREXP_MANT_F32 - - // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; - // else D.f = frexp(S0.f, Exponent(S0.f)). - void - Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } - - Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_clrexcp") - { - setFlag(ALU); - } // Inst_VOP1__V_CLREXCP - - Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() - { - } // ~Inst_VOP1__V_CLREXCP - - void - Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_u16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_F16_U16 - - Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() - { - } // ~Inst_VOP1__V_CVT_F16_U16 - - // D.f16 = uint16_to_flt16(S.u16). - void - Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_i16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_F16_I16 - - Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() - { - } // ~Inst_VOP1__V_CVT_F16_I16 - - // D.f16 = int16_to_flt16(S.i16). - void - Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_U16_F16 - - Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() - { - } // ~Inst_VOP1__V_CVT_U16_F16 - - // D.u16 = flt16_to_uint16(S.f16). - void - Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_I16_F16 - - Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() - { - } // ~Inst_VOP1__V_CVT_I16_F16 - - // D.i16 = flt16_to_int16(S.f16). - void - Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RCP_F16 - - Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() - { - } // ~Inst_VOP1__V_RCP_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = 1 / S0.f16; - void - Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_SQRT_F16 - - Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() - { - } // ~Inst_VOP1__V_SQRT_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = sqrt(S0.f16); - void - Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RSQ_F16 - - Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() - { - } // ~Inst_VOP1__V_RSQ_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = 1 / sqrt(S0.f16); - void - Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_LOG_F16 - - Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() - { - } // ~Inst_VOP1__V_LOG_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 0.0f; - // else - // D.f16 = log2(S0.f16); - void - Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_EXP_F16 - - Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() - { - } // ~Inst_VOP1__V_EXP_F16 - - // if (S0.f16 == 0.0f) - // D.f16 = 1.0f; - // else - // D.f16 = pow(2.0, S0.f16). - void - Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FREXP_MANT_F16 - - Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() - { - } // ~Inst_VOP1__V_FREXP_MANT_F16 - - // if (S0.f16 == +-INF || S0.f16 == NAN) - // D.f16 = S0.f16; - // else - // D.f16 = mantissa(S0.f16). - void - Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FREXP_EXP_I16_F16 - - Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() - { - } // ~Inst_VOP1__V_FREXP_EXP_I16_F16 - - // frexp(S0.f16, Exponent(S0.f16)) - // if (S0.f16 == +-INF || S0.f16 == NAN) - // D.i16 = 0; - // else - // D.i16 = Exponent(S0.f16); - void - Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FLOOR_F16 - - Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() - { - } // ~Inst_VOP1__V_FLOOR_F16 - - // D.f16 = floor(S0.f16); - void - Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CEIL_F16 - - Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() - { - } // ~Inst_VOP1__V_CEIL_F16 - - // D.f16 = ceil(S0.f16); - void - Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_TRUNC_F16 - - Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() - { - } // ~Inst_VOP1__V_TRUNC_F16 - - // D.f16 = trunc(S0.f16). - void - Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RNDNE_F16 - - Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() - { - } // ~Inst_VOP1__V_RNDNE_F16 - - // D.f16 = roundNearestEven(S0.f16); - void - Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FRACT_F16 - - Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() - { - } // ~Inst_VOP1__V_FRACT_F16 - - // D.f16 = S0.f16 + -floor(S0.f16). - void - Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sin_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_SIN_F16 - - Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() - { - } // ~Inst_VOP1__V_SIN_F16 - - // D.f16 = sin(S0.f16 * 2 * PI). - void - Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cos_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_COS_F16 - - Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() - { - } // ~Inst_VOP1__V_COS_F16 - - // D.f16 = cos(S0.f16 * 2 * PI). - void - Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_EXP_LEGACY_F32 - - Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() - { - } // ~Inst_VOP1__V_EXP_LEGACY_F32 - - // D.f = pow(2.0, S0.f) - void - Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_LOG_LEGACY_F32 - - Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() - { - } // ~Inst_VOP1__V_LOG_LEGACY_F32 - - // D.f = log2(S0.f). - void - Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_CLASS_F32 - - Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32() - { - } // ~Inst_VOPC__V_CMP_CLASS_F32 - - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_CLASS_F32 - - Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F32 - - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f The function reports true if the floating point value is any of - // the numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_CLASS_F64 - - Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64() - { - } // ~Inst_VOPC__V_CMP_CLASS_F64 - - // VCC = IEEE numeric class function specified in S1.u, performed on S0.d - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_CLASS_F64 - - Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F64 - - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.d The function reports true if the floating point value is any of - // the numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_CLASS_F16 - - Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16() - { - } // ~Inst_VOPC__V_CMP_CLASS_F16 - - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_CLASS_F16 - - Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F16 - - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f16 - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_F_F16 - - Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16() - { - } // ~Inst_VOPC__V_CMP_F_F16 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LT_F16 - - Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16() - { - } // ~Inst_VOPC__V_CMP_LT_F16 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_EQ_F16 - - Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16() - { - } // ~Inst_VOPC__V_CMP_EQ_F16 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LE_F16 - - Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16() - { - } // ~Inst_VOPC__V_CMP_LE_F16 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_GT_F16 - - Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16() - { - } // ~Inst_VOPC__V_CMP_GT_F16 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LG_F16 - - Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16() - { - } // ~Inst_VOPC__V_CMP_LG_F16 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_GE_F16 - - Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16() - { - } // ~Inst_VOPC__V_CMP_GE_F16 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_O_F16 - - Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16() - { - } // ~Inst_VOPC__V_CMP_O_F16 - - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_U_F16 - - Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16() - { - } // ~Inst_VOPC__V_CMP_U_F16 - - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NGE_F16 - - Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16() - { - } // ~Inst_VOPC__V_CMP_NGE_F16 - - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLG_F16 - - Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16() - { - } // ~Inst_VOPC__V_CMP_NLG_F16 - - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NGT_F16 - - Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16() - { - } // ~Inst_VOPC__V_CMP_NGT_F16 - - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLE_F16 - - Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16() - { - } // ~Inst_VOPC__V_CMP_NLE_F16 - - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NEQ_F16 - - Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16() - { - } // ~Inst_VOPC__V_CMP_NEQ_F16 - - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLT_F16 - - Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16() - { - } // ~Inst_VOPC__V_CMP_NLT_F16 - - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_TRU_F16 - - Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16() - { - } // ~Inst_VOPC__V_CMP_TRU_F16 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_F_F16 - - Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16() - { - } // ~Inst_VOPC__V_CMPX_F_F16 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_LT_F16 - - Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16() - { - } // ~Inst_VOPC__V_CMPX_LT_F16 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_EQ_F16 - - Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16() - { - } // ~Inst_VOPC__V_CMPX_EQ_F16 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_LE_F16 - - Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16() - { - } // ~Inst_VOPC__V_CMPX_LE_F16 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_GT_F16 - - Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16() - { - } // ~Inst_VOPC__V_CMPX_GT_F16 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_LG_F16 - - Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16() - { - } // ~Inst_VOPC__V_CMPX_LG_F16 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_GE_F16 - - Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16() - { - } // ~Inst_VOPC__V_CMPX_GE_F16 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_O_F16 - - Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16() - { - } // ~Inst_VOPC__V_CMPX_O_F16 - - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_U_F16 - - Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16() - { - } // ~Inst_VOPC__V_CMPX_U_F16 - - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_NGE_F16 - - Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16() - { - } // ~Inst_VOPC__V_CMPX_NGE_F16 - - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_NLG_F16 - - Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16() - { - } // ~Inst_VOPC__V_CMPX_NLG_F16 - - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_NGT_F16 - - Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16() - { - } // ~Inst_VOPC__V_CMPX_NGT_F16 - - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_NLE_F16 - - Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16() - { - } // ~Inst_VOPC__V_CMPX_NLE_F16 - - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_NEQ_F16 - - Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F16 - - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_NLT_F16 - - Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16() - { - } // ~Inst_VOPC__V_CMPX_NLT_F16 - - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMPX_TRU_F16 - - Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16() - { - } // ~Inst_VOPC__V_CMPX_TRU_F16 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_F_F32 - - Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32() - { - } // ~Inst_VOPC__V_CMP_F_F32 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LT_F32 - - Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32() - { - } // ~Inst_VOPC__V_CMP_LT_F32 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_EQ_F32 - - Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32() - { - } // ~Inst_VOPC__V_CMP_EQ_F32 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LE_F32 - - Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32() - { - } // ~Inst_VOPC__V_CMP_LE_F32 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_GT_F32 - - Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32() - { - } // ~Inst_VOPC__V_CMP_GT_F32 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LG_F32 - - Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32() - { - } // ~Inst_VOPC__V_CMP_LG_F32 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_GE_F32 - - Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32() - { - } // ~Inst_VOPC__V_CMP_GE_F32 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_O_F32 - - Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32() - { - } // ~Inst_VOPC__V_CMP_O_F32 - - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_U_F32 - - Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32() - { - } // ~Inst_VOPC__V_CMP_U_F32 - - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NGE_F32 - - Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32() - { - } // ~Inst_VOPC__V_CMP_NGE_F32 - - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLG_F32 - - Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32() - { - } // ~Inst_VOPC__V_CMP_NLG_F32 - - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NGT_F32 - - Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32() - { - } // ~Inst_VOPC__V_CMP_NGT_F32 - - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLE_F32 - - Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32() - { - } // ~Inst_VOPC__V_CMP_NLE_F32 - - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NEQ_F32 - - Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32() - { - } // ~Inst_VOPC__V_CMP_NEQ_F32 - - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLT_F32 - - Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32() - { - } // ~Inst_VOPC__V_CMP_NLT_F32 - - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_TRU_F32 - - Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32() - { - } // ~Inst_VOPC__V_CMP_TRU_F32 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_F_F32 - - Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32() - { - } // ~Inst_VOPC__V_CMPX_F_F32 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_LT_F32 - - Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32() - { - } // ~Inst_VOPC__V_CMPX_LT_F32 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_EQ_F32 - - Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32() - { - } // ~Inst_VOPC__V_CMPX_EQ_F32 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_LE_F32 - - Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32() - { - } // ~Inst_VOPC__V_CMPX_LE_F32 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_GT_F32 - - Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32() - { - } // ~Inst_VOPC__V_CMPX_GT_F32 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_LG_F32 - - Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32() - { - } // ~Inst_VOPC__V_CMPX_LG_F32 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_GE_F32 - - Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32() - { - } // ~Inst_VOPC__V_CMPX_GE_F32 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_O_F32 - - Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32() - { - } // ~Inst_VOPC__V_CMPX_O_F32 - - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_U_F32 - - Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32() - { - } // ~Inst_VOPC__V_CMPX_U_F32 - - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_NGE_F32 - - Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32() - { - } // ~Inst_VOPC__V_CMPX_NGE_F32 - - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_NLG_F32 - - Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32() - { - } // ~Inst_VOPC__V_CMPX_NLG_F32 - - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_NGT_F32 - - Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32() - { - } // ~Inst_VOPC__V_CMPX_NGT_F32 - - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_NLE_F32 - - Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32() - { - } // ~Inst_VOPC__V_CMPX_NLE_F32 - - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_NEQ_F32 - - Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F32 - - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_NLT_F32 - - Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32() - { - } // ~Inst_VOPC__V_CMPX_NLT_F32 - - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMPX_TRU_F32 - - Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32() - { - } // ~Inst_VOPC__V_CMPX_TRU_F32 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_F_F64 - - Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64() - { - } // ~Inst_VOPC__V_CMP_F_F64 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LT_F64 - - Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64() - { - } // ~Inst_VOPC__V_CMP_LT_F64 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_EQ_F64 - - Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64() - { - } // ~Inst_VOPC__V_CMP_EQ_F64 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LE_F64 - - Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64() - { - } // ~Inst_VOPC__V_CMP_LE_F64 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_GT_F64 - - Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64() - { - } // ~Inst_VOPC__V_CMP_GT_F64 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LG_F64 - - Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64() - { - } // ~Inst_VOPC__V_CMP_LG_F64 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_GE_F64 - - Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64() - { - } // ~Inst_VOPC__V_CMP_GE_F64 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_O_F64 - - Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64() - { - } // ~Inst_VOPC__V_CMP_O_F64 - - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_U_F64 - - Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64() - { - } // ~Inst_VOPC__V_CMP_U_F64 - - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NGE_F64 - - Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64() - { - } // ~Inst_VOPC__V_CMP_NGE_F64 - - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLG_F64 - - Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64() - { - } // ~Inst_VOPC__V_CMP_NLG_F64 - - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NGT_F64 - - Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64() - { - } // ~Inst_VOPC__V_CMP_NGT_F64 - - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLE_F64 - - Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64() - { - } // ~Inst_VOPC__V_CMP_NLE_F64 - - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NEQ_F64 - - Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64() - { - } // ~Inst_VOPC__V_CMP_NEQ_F64 - - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLT_F64 - - Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64() - { - } // ~Inst_VOPC__V_CMP_NLT_F64 - - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_TRU_F64 - - Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64() - { - } // ~Inst_VOPC__V_CMP_TRU_F64 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_F_F64 - - Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64() - { - } // ~Inst_VOPC__V_CMPX_F_F64 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_LT_F64 - - Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64() - { - } // ~Inst_VOPC__V_CMPX_LT_F64 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_EQ_F64 - - Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64() - { - } // ~Inst_VOPC__V_CMPX_EQ_F64 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } - - Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_LE_F64 - - Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64() - { - } // ~Inst_VOPC__V_CMPX_LE_F64 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_GT_F64 - - Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64() - { - } // ~Inst_VOPC__V_CMPX_GT_F64 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_LG_F64 - - Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64() - { - } // ~Inst_VOPC__V_CMPX_LG_F64 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_GE_F64 - - Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64() - { - } // ~Inst_VOPC__V_CMPX_GE_F64 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_O_F64 - - Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64() - { - } // ~Inst_VOPC__V_CMPX_O_F64 - - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_U_F64 - - Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64() - { - } // ~Inst_VOPC__V_CMPX_U_F64 - - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_NGE_F64 - - Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64() - { - } // ~Inst_VOPC__V_CMPX_NGE_F64 - - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_NLG_F64 - - Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64() - { - } // ~Inst_VOPC__V_CMPX_NLG_F64 - - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_NGT_F64 - - Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64() - { - } // ~Inst_VOPC__V_CMPX_NGT_F64 - - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_NLE_F64 - - Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64() - { - } // ~Inst_VOPC__V_CMPX_NLE_F64 - - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_NEQ_F64 - - Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F64 - - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_NLT_F64 - - Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64() - { - } // ~Inst_VOPC__V_CMPX_NLT_F64 - - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMPX_TRU_F64 - - Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64() - { - } // ~Inst_VOPC__V_CMPX_TRU_F64 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I16 - - Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16() - { - } // ~Inst_VOPC__V_CMP_F_I16 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I16 - - Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16() - { - } // ~Inst_VOPC__V_CMP_LT_I16 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I16 - - Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16() - { - } // ~Inst_VOPC__V_CMP_EQ_I16 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I16 - - Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16() - { - } // ~Inst_VOPC__V_CMP_LE_I16 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I16 - - Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16() - { - } // ~Inst_VOPC__V_CMP_GT_I16 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I16 - - Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16() - { - } // ~Inst_VOPC__V_CMP_NE_I16 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I16 - - Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16() - { - } // ~Inst_VOPC__V_CMP_GE_I16 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I16 - - Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16() - { - } // ~Inst_VOPC__V_CMP_T_I16 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U16 - - Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16() - { - } // ~Inst_VOPC__V_CMP_F_U16 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U16 - - Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16() - { - } // ~Inst_VOPC__V_CMP_LT_U16 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U16 - - Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16() - { - } // ~Inst_VOPC__V_CMP_EQ_U16 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U16 - - Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16() - { - } // ~Inst_VOPC__V_CMP_LE_U16 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U16 - - Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16() - { - } // ~Inst_VOPC__V_CMP_GT_U16 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U16 - - Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16() - { - } // ~Inst_VOPC__V_CMP_NE_U16 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U16 - - Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16() - { - } // ~Inst_VOPC__V_CMP_GE_U16 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U16 - - Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16() - { - } // ~Inst_VOPC__V_CMP_T_U16 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_F_I16 - - Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16() - { - } // ~Inst_VOPC__V_CMPX_F_I16 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LT_I16 - - Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16() - { - } // ~Inst_VOPC__V_CMPX_LT_I16 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_EQ_I16 - - Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16() - { - } // ~Inst_VOPC__V_CMPX_EQ_I16 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LE_I16 - - Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16() - { - } // ~Inst_VOPC__V_CMPX_LE_I16 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GT_I16 - - Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16() - { - } // ~Inst_VOPC__V_CMPX_GT_I16 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_NE_I16 - - Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16() - { - } // ~Inst_VOPC__V_CMPX_NE_I16 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GE_I16 - - Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16() - { - } // ~Inst_VOPC__V_CMPX_GE_I16 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_T_I16 - - Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16() - { - } // ~Inst_VOPC__V_CMPX_T_I16 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_F_U16 - - Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16() - { - } // ~Inst_VOPC__V_CMPX_F_U16 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LT_U16 - - Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16() - { - } // ~Inst_VOPC__V_CMPX_LT_U16 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_EQ_U16 - - Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16() - { - } // ~Inst_VOPC__V_CMPX_EQ_U16 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LE_U16 - - Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16() - { - } // ~Inst_VOPC__V_CMPX_LE_U16 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GT_U16 - - Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16() - { - } // ~Inst_VOPC__V_CMPX_GT_U16 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_NE_U16 - - Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16() - { - } // ~Inst_VOPC__V_CMPX_NE_U16 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GE_U16 - - Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16() - { - } // ~Inst_VOPC__V_CMPX_GE_U16 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_T_U16 - - Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16() - { - } // ~Inst_VOPC__V_CMPX_T_U16 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I32 - - Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32() - { - } // ~Inst_VOPC__V_CMP_F_I32 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I32 - - Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32() - { - } // ~Inst_VOPC__V_CMP_LT_I32 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I32 - - Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32() - { - } // ~Inst_VOPC__V_CMP_EQ_I32 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I32 - - Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32() - { - } // ~Inst_VOPC__V_CMP_LE_I32 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I32 - - Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32() - { - } // ~Inst_VOPC__V_CMP_GT_I32 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I32 - - Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32() - { - } // ~Inst_VOPC__V_CMP_NE_I32 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I32 - - Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32() - { - } // ~Inst_VOPC__V_CMP_GE_I32 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I32 - - Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32() - { - } // ~Inst_VOPC__V_CMP_T_I32 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U32 - - Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32() - { - } // ~Inst_VOPC__V_CMP_F_U32 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U32 - - Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32() - { - } // ~Inst_VOPC__V_CMP_LT_U32 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U32 - - Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32() - { - } // ~Inst_VOPC__V_CMP_EQ_U32 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U32 - - Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32() - { - } // ~Inst_VOPC__V_CMP_LE_U32 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U32 - - Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32() - { - } // ~Inst_VOPC__V_CMP_GT_U32 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U32 - - Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32() - { - } // ~Inst_VOPC__V_CMP_NE_U32 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U32 - - Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32() - { - } // ~Inst_VOPC__V_CMP_GE_U32 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U32 - - Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32() - { - } // ~Inst_VOPC__V_CMP_T_U32 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_F_I32 - - Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32() - { - } // ~Inst_VOPC__V_CMPX_F_I32 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LT_I32 - - Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32() - { - } // ~Inst_VOPC__V_CMPX_LT_I32 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_EQ_I32 - - Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32() - { - } // ~Inst_VOPC__V_CMPX_EQ_I32 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LE_I32 - - Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32() - { - } // ~Inst_VOPC__V_CMPX_LE_I32 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GT_I32 - - Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32() - { - } // ~Inst_VOPC__V_CMPX_GT_I32 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_NE_I32 - - Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32() - { - } // ~Inst_VOPC__V_CMPX_NE_I32 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GE_I32 - - Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32() - { - } // ~Inst_VOPC__V_CMPX_GE_I32 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_T_I32 - - Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32() - { - } // ~Inst_VOPC__V_CMPX_T_I32 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_F_U32 - - Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32() - { - } // ~Inst_VOPC__V_CMPX_F_U32 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LT_U32 - - Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32() - { - } // ~Inst_VOPC__V_CMPX_LT_U32 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_EQ_U32 - - Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32() - { - } // ~Inst_VOPC__V_CMPX_EQ_U32 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LE_U32 - - Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32() - { - } // ~Inst_VOPC__V_CMPX_LE_U32 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GT_U32 - - Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32() - { - } // ~Inst_VOPC__V_CMPX_GT_U32 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_NE_U32 - - Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32() - { - } // ~Inst_VOPC__V_CMPX_NE_U32 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GE_U32 - - Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32() - { - } // ~Inst_VOPC__V_CMPX_GE_U32 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_T_U32 - - Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32() - { - } // ~Inst_VOPC__V_CMPX_T_U32 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I64 - - Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64() - { - } // ~Inst_VOPC__V_CMP_F_I64 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I64 - - Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64() - { - } // ~Inst_VOPC__V_CMP_LT_I64 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I64 - - Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64() - { - } // ~Inst_VOPC__V_CMP_EQ_I64 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I64 - - Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64() - { - } // ~Inst_VOPC__V_CMP_LE_I64 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I64 - - Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64() - { - } // ~Inst_VOPC__V_CMP_GT_I64 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I64 - - Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64() - { - } // ~Inst_VOPC__V_CMP_NE_I64 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I64 - - Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64() - { - } // ~Inst_VOPC__V_CMP_GE_I64 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I64 - - Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64() - { - } // ~Inst_VOPC__V_CMP_T_I64 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U64 - - Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64() - { - } // ~Inst_VOPC__V_CMP_F_U64 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U64 - - Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64() - { - } // ~Inst_VOPC__V_CMP_LT_U64 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U64 - - Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64() - { - } // ~Inst_VOPC__V_CMP_EQ_U64 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U64 - - Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64() - { - } // ~Inst_VOPC__V_CMP_LE_U64 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U64 - - Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64() - { - } // ~Inst_VOPC__V_CMP_GT_U64 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U64 - - Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64() - { - } // ~Inst_VOPC__V_CMP_NE_U64 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U64 - - Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64() - { - } // ~Inst_VOPC__V_CMP_GE_U64 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U64 - - Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64() - { - } // ~Inst_VOPC__V_CMP_T_U64 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_F_I64 - - Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64() - { - } // ~Inst_VOPC__V_CMPX_F_I64 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LT_I64 - - Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64() - { - } // ~Inst_VOPC__V_CMPX_LT_I64 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_EQ_I64 - - Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64() - { - } // ~Inst_VOPC__V_CMPX_EQ_I64 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LE_I64 - - Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64() - { - } // ~Inst_VOPC__V_CMPX_LE_I64 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GT_I64 - - Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64() - { - } // ~Inst_VOPC__V_CMPX_GT_I64 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_NE_I64 - - Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64() - { - } // ~Inst_VOPC__V_CMPX_NE_I64 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GE_I64 - - Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64() - { - } // ~Inst_VOPC__V_CMPX_GE_I64 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_T_I64 - - Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64() - { - } // ~Inst_VOPC__V_CMPX_T_I64 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_F_U64 - - Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64() - { - } // ~Inst_VOPC__V_CMPX_F_U64 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LT_U64 - - Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64() - { - } // ~Inst_VOPC__V_CMPX_LT_U64 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_EQ_U64 - - Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64() - { - } // ~Inst_VOPC__V_CMPX_EQ_U64 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_LE_U64 - - Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64() - { - } // ~Inst_VOPC__V_CMPX_LE_U64 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GT_U64 - - Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64() - { - } // ~Inst_VOPC__V_CMPX_GT_U64 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_NE_U64 - - Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64() - { - } // ~Inst_VOPC__V_CMPX_NE_U64 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_GE_U64 - - Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64() - { - } // ~Inst_VOPC__V_CMPX_GE_U64 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMPX_T_U64 - - Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64() - { - } // ~Inst_VOPC__V_CMPX_T_U64 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } - - Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_p1_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_P1_F32 - - Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32() - { - } // ~Inst_VINTRP__V_INTERP_P1_F32 - - // D.f = P10 * S.f + P0; parameter interpolation - void - Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_p2_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_P2_F32 - - Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32() - { - } // ~Inst_VINTRP__V_INTERP_P2_F32 - - // D.f = P20 * S.f + D.f; parameter interpolation - void - Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_mov_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_MOV_F32 - - Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32() - { - } // ~Inst_VINTRP__V_INTERP_MOV_F32 - - void - Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_class_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_CLASS_F32 - - Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32() - { - } // ~Inst_VOP3__V_CMP_CLASS_F32 - - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_class_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_CLASS_F32 - - Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F32 - - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_class_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_CLASS_F64 - - Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64() - { - } // ~Inst_VOP3__V_CMP_CLASS_F64 - - // VCC = IEEE numeric class function specified in S1.u, performed on S0.d - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_class_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_CLASS_F64 - - Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F64 - - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.d - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_class_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_CLASS_F16 - - Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16() - { - } // ~Inst_VOP3__V_CMP_CLASS_F16 - - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_class_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_CLASS_F16 - - Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F16 - - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f16 - // The function reports true if the floating point value is any of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_F_F16 - - Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16() - { - } // ~Inst_VOP3__V_CMP_F_F16 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LT_F16 - - Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16() - { - } // ~Inst_VOP3__V_CMP_LT_F16 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_EQ_F16 - - Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16() - { - } // ~Inst_VOP3__V_CMP_EQ_F16 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LE_F16 - - Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16() - { - } // ~Inst_VOP3__V_CMP_LE_F16 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_GT_F16 - - Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16() - { - } // ~Inst_VOP3__V_CMP_GT_F16 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LG_F16 - - Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16() - { - } // ~Inst_VOP3__V_CMP_LG_F16 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_GE_F16 - - Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16() - { - } // ~Inst_VOP3__V_CMP_GE_F16 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_o_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_O_F16 - - Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16() - { - } // ~Inst_VOP3__V_CMP_O_F16 - - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_u_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_U_F16 - - Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16() - { - } // ~Inst_VOP3__V_CMP_U_F16 - - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NGE_F16 - - Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16() - { - } // ~Inst_VOP3__V_CMP_NGE_F16 - - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nlg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLG_F16 - - Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16() - { - } // ~Inst_VOP3__V_CMP_NLG_F16 - - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ngt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NGT_F16 - - Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16() - { - } // ~Inst_VOP3__V_CMP_NGT_F16 - - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nle_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLE_F16 - - Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16() - { - } // ~Inst_VOP3__V_CMP_NLE_F16 - - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_neq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NEQ_F16 - - Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16() - { - } // ~Inst_VOP3__V_CMP_NEQ_F16 - - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nlt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLT_F16 - - Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16() - { - } // ~Inst_VOP3__V_CMP_NLT_F16 - - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_tru_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_TRU_F16 - - Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16() - { - } // ~Inst_VOP3__V_CMP_TRU_F16 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_f16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_F16 - - Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16() - { - } // ~Inst_VOP3__V_CMPX_F_F16 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_LT_F16 - - Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16() - { - } // ~Inst_VOP3__V_CMPX_LT_F16 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_EQ_F16 - - Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16() - { - } // ~Inst_VOP3__V_CMPX_EQ_F16 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_LE_F16 - - Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16() - { - } // ~Inst_VOP3__V_CMPX_LE_F16 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_GT_F16 - - Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16() - { - } // ~Inst_VOP3__V_CMPX_GT_F16 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_LG_F16 - - Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16() - { - } // ~Inst_VOP3__V_CMPX_LG_F16 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_GE_F16 - - Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16() - { - } // ~Inst_VOP3__V_CMPX_GE_F16 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_o_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_O_F16 - - Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16() - { - } // ~Inst_VOP3__V_CMPX_O_F16 - - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_u_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_U_F16 - - Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16() - { - } // ~Inst_VOP3__V_CMPX_U_F16 - - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_NGE_F16 - - Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16() - { - } // ~Inst_VOP3__V_CMPX_NGE_F16 - - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nlg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_NLG_F16 - - Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16() - { - } // ~Inst_VOP3__V_CMPX_NLG_F16 - - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ngt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_NGT_F16 - - Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16() - { - } // ~Inst_VOP3__V_CMPX_NGT_F16 - - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nle_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_NLE_F16 - - Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16() - { - } // ~Inst_VOP3__V_CMPX_NLE_F16 - - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_neq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_NEQ_F16 - - Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F16 - - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nlt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_NLT_F16 - - Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16() - { - } // ~Inst_VOP3__V_CMPX_NLT_F16 - - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_tru_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMPX_TRU_F16 - - Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16() - { - } // ~Inst_VOP3__V_CMPX_TRU_F16 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_F_F32 - - Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32() - { - } // ~Inst_VOP3__V_CMP_F_F32 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LT_F32 - - Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32() - { - } // ~Inst_VOP3__V_CMP_LT_F32 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_EQ_F32 - - Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32() - { - } // ~Inst_VOP3__V_CMP_EQ_F32 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LE_F32 - - Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32() - { - } // ~Inst_VOP3__V_CMP_LE_F32 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_GT_F32 - - Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32() - { - } // ~Inst_VOP3__V_CMP_GT_F32 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LG_F32 - - Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32() - { - } // ~Inst_VOP3__V_CMP_LG_F32 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_GE_F32 - - Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32() - { - } // ~Inst_VOP3__V_CMP_GE_F32 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_o_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_O_F32 - - Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32() - { - } // ~Inst_VOP3__V_CMP_O_F32 - - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_u_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_U_F32 - - Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32() - { - } // ~Inst_VOP3__V_CMP_U_F32 - - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NGE_F32 - - Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32() - { - } // ~Inst_VOP3__V_CMP_NGE_F32 - - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nlg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLG_F32 - - Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32() - { - } // ~Inst_VOP3__V_CMP_NLG_F32 - - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ngt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NGT_F32 - - Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32() - { - } // ~Inst_VOP3__V_CMP_NGT_F32 - - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nle_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLE_F32 - - Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32() - { - } // ~Inst_VOP3__V_CMP_NLE_F32 - - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_neq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NEQ_F32 - - Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32() - { - } // ~Inst_VOP3__V_CMP_NEQ_F32 - - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nlt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLT_F32 - - Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32() - { - } // ~Inst_VOP3__V_CMP_NLT_F32 - - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_tru_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_TRU_F32 - - Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32() - { - } // ~Inst_VOP3__V_CMP_TRU_F32 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_F_F32 - - Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32() - { - } // ~Inst_VOP3__V_CMPX_F_F32 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_LT_F32 - - Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32() - { - } // ~Inst_VOP3__V_CMPX_LT_F32 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_EQ_F32 - - Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32() - { - } // ~Inst_VOP3__V_CMPX_EQ_F32 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_LE_F32 - - Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32() - { - } // ~Inst_VOP3__V_CMPX_LE_F32 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_GT_F32 - - Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32() - { - } // ~Inst_VOP3__V_CMPX_GT_F32 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_LG_F32 - - Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32() - { - } // ~Inst_VOP3__V_CMPX_LG_F32 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_GE_F32 - - Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32() - { - } // ~Inst_VOP3__V_CMPX_GE_F32 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_o_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_O_F32 - - Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32() - { - } // ~Inst_VOP3__V_CMPX_O_F32 - - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_u_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_U_F32 - - Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32() - { - } // ~Inst_VOP3__V_CMPX_U_F32 - - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_NGE_F32 - - Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32() - { - } // ~Inst_VOP3__V_CMPX_NGE_F32 - - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nlg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_NLG_F32 - - Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32() - { - } // ~Inst_VOP3__V_CMPX_NLG_F32 - - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ngt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_NGT_F32 - - Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32() - { - } // ~Inst_VOP3__V_CMPX_NGT_F32 - - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nle_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_NLE_F32 - - Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32() - { - } // ~Inst_VOP3__V_CMPX_NLE_F32 - - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_neq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_NEQ_F32 - - Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F32 - - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nlt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_NLT_F32 - - Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32() - { - } // ~Inst_VOP3__V_CMPX_NLT_F32 - - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_tru_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMPX_TRU_F32 - - Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32() - { - } // ~Inst_VOP3__V_CMPX_TRU_F32 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_F_F64 - - Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64() - { - } // ~Inst_VOP3__V_CMP_F_F64 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LT_F64 - - Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64() - { - } // ~Inst_VOP3__V_CMP_LT_F64 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_EQ_F64 - - Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64() - { - } // ~Inst_VOP3__V_CMP_EQ_F64 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LE_F64 - - Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64() - { - } // ~Inst_VOP3__V_CMP_LE_F64 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_GT_F64 - - Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64() - { - } // ~Inst_VOP3__V_CMP_GT_F64 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LG_F64 - - Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64() - { - } // ~Inst_VOP3__V_CMP_LG_F64 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_GE_F64 - - Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64() - { - } // ~Inst_VOP3__V_CMP_GE_F64 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_o_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_O_F64 - - Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64() - { - } // ~Inst_VOP3__V_CMP_O_F64 - - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_u_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_U_F64 - - Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64() - { - } // ~Inst_VOP3__V_CMP_U_F64 - - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NGE_F64 - - Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64() - { - } // ~Inst_VOP3__V_CMP_NGE_F64 - - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nlg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLG_F64 - - Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64() - { - } // ~Inst_VOP3__V_CMP_NLG_F64 - - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ngt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NGT_F64 - - Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64() - { - } // ~Inst_VOP3__V_CMP_NGT_F64 - - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nle_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLE_F64 - - Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64() - { - } // ~Inst_VOP3__V_CMP_NLE_F64 - - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_neq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NEQ_F64 - - Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64() - { - } // ~Inst_VOP3__V_CMP_NEQ_F64 - - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_nlt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLT_F64 - - Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64() - { - } // ~Inst_VOP3__V_CMP_NLT_F64 - - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_tru_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_TRU_F64 - - Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64() - { - } // ~Inst_VOP3__V_CMP_TRU_F64 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_F_F64 - - Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64() - { - } // ~Inst_VOP3__V_CMPX_F_F64 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_LT_F64 - - Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64() - { - } // ~Inst_VOP3__V_CMPX_LT_F64 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_EQ_F64 - - Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64() - { - } // ~Inst_VOP3__V_CMPX_EQ_F64 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_LE_F64 - - Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64() - { - } // ~Inst_VOP3__V_CMPX_LE_F64 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_GT_F64 - - Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64() - { - } // ~Inst_VOP3__V_CMPX_GT_F64 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_LG_F64 - - Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64() - { - } // ~Inst_VOP3__V_CMPX_LG_F64 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_GE_F64 - - Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64() - { - } // ~Inst_VOP3__V_CMPX_GE_F64 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_o_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_O_F64 - - Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64() - { - } // ~Inst_VOP3__V_CMPX_O_F64 - - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_u_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_U_F64 - - Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64() - { - } // ~Inst_VOP3__V_CMPX_U_F64 - - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_NGE_F64 - - Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64() - { - } // ~Inst_VOP3__V_CMPX_NGE_F64 - - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nlg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_NLG_F64 - - Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64() - { - } // ~Inst_VOP3__V_CMPX_NLG_F64 - - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ngt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_NGT_F64 - - Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64() - { - } // ~Inst_VOP3__V_CMPX_NGT_F64 - - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nle_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_NLE_F64 - - Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64() - { - } // ~Inst_VOP3__V_CMPX_NLE_F64 - - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_neq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_NEQ_F64 - - Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F64 - - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_nlt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_NLT_F64 - - Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64() - { - } // ~Inst_VOP3__V_CMPX_NLT_F64 - - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_tru_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMPX_TRU_F64 - - Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64() - { - } // ~Inst_VOP3__V_CMPX_TRU_F64 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I16 - - Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16() - { - } // ~Inst_VOP3__V_CMP_F_I16 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I16 - - Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16() - { - } // ~Inst_VOP3__V_CMP_LT_I16 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I16 - - Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16() - { - } // ~Inst_VOP3__V_CMP_EQ_I16 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I16 - - Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16() - { - } // ~Inst_VOP3__V_CMP_LE_I16 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I16 - - Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16() - { - } // ~Inst_VOP3__V_CMP_GT_I16 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ne_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I16 - - Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16() - { - } // ~Inst_VOP3__V_CMP_NE_I16 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I16 - - Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16() - { - } // ~Inst_VOP3__V_CMP_GE_I16 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_t_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I16 - - Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16() - { - } // ~Inst_VOP3__V_CMP_T_I16 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U16 - - Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16() - { - } // ~Inst_VOP3__V_CMP_F_U16 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U16 - - Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16() - { - } // ~Inst_VOP3__V_CMP_LT_U16 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U16 - - Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16() - { - } // ~Inst_VOP3__V_CMP_EQ_U16 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U16 - - Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16() - { - } // ~Inst_VOP3__V_CMP_LE_U16 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U16 - - Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16() - { - } // ~Inst_VOP3__V_CMP_GT_U16 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ne_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U16 - - Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16() - { - } // ~Inst_VOP3__V_CMP_NE_U16 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U16 - - Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16() - { - } // ~Inst_VOP3__V_CMP_GE_U16 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_t_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U16 - - Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16() - { - } // ~Inst_VOP3__V_CMP_T_U16 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_I16 - - Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16() - { - } // ~Inst_VOP3__V_CMPX_F_I16 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LT_I16 - - Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16() - { - } // ~Inst_VOP3__V_CMPX_LT_I16 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_EQ_I16 - - Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16() - { - } // ~Inst_VOP3__V_CMPX_EQ_I16 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LE_I16 - - Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16() - { - } // ~Inst_VOP3__V_CMPX_LE_I16 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GT_I16 - - Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16() - { - } // ~Inst_VOP3__V_CMPX_GT_I16 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ne_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_NE_I16 - - Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16() - { - } // ~Inst_VOP3__V_CMPX_NE_I16 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GE_I16 - - Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16() - { - } // ~Inst_VOP3__V_CMPX_GE_I16 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_t_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_T_I16 - - Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16() - { - } // ~Inst_VOP3__V_CMPX_T_I16 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_U16 - - Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16() - { - } // ~Inst_VOP3__V_CMPX_F_U16 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LT_U16 - - Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16() - { - } // ~Inst_VOP3__V_CMPX_LT_U16 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_EQ_U16 - - Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16() - { - } // ~Inst_VOP3__V_CMPX_EQ_U16 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LE_U16 - - Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16() - { - } // ~Inst_VOP3__V_CMPX_LE_U16 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GT_U16 - - Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16() - { - } // ~Inst_VOP3__V_CMPX_GT_U16 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ne_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_NE_U16 - - Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16() - { - } // ~Inst_VOP3__V_CMPX_NE_U16 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GE_U16 - - Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16() - { - } // ~Inst_VOP3__V_CMPX_GE_U16 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_t_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_T_U16 - - Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16() - { - } // ~Inst_VOP3__V_CMPX_T_U16 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I32 - - Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32() - { - } // ~Inst_VOP3__V_CMP_F_I32 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I32 - - Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32() - { - } // ~Inst_VOP3__V_CMP_LT_I32 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I32 - - Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32() - { - } // ~Inst_VOP3__V_CMP_EQ_I32 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I32 - - Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32() - { - } // ~Inst_VOP3__V_CMP_LE_I32 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I32 - - Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32() - { - } // ~Inst_VOP3__V_CMP_GT_I32 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ne_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I32 - - Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32() - { - } // ~Inst_VOP3__V_CMP_NE_I32 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I32 - - Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32() - { - } // ~Inst_VOP3__V_CMP_GE_I32 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_t_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I32 - - Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32() - { - } // ~Inst_VOP3__V_CMP_T_I32 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U32 - - Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32() - { - } // ~Inst_VOP3__V_CMP_F_U32 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U32 - - Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32() - { - } // ~Inst_VOP3__V_CMP_LT_U32 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U32 - - Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32() - { - } // ~Inst_VOP3__V_CMP_EQ_U32 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U32 - - Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32() - { - } // ~Inst_VOP3__V_CMP_LE_U32 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U32 - - Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32() - { - } // ~Inst_VOP3__V_CMP_GT_U32 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ne_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U32 - - Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32() - { - } // ~Inst_VOP3__V_CMP_NE_U32 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U32 - - Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32() - { - } // ~Inst_VOP3__V_CMP_GE_U32 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_t_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U32 - - Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32() - { - } // ~Inst_VOP3__V_CMP_T_U32 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_I32 - - Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32() - { - } // ~Inst_VOP3__V_CMPX_F_I32 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LT_I32 - - Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32() - { - } // ~Inst_VOP3__V_CMPX_LT_I32 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_EQ_I32 - - Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32() - { - } // ~Inst_VOP3__V_CMPX_EQ_I32 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LE_I32 - - Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32() - { - } // ~Inst_VOP3__V_CMPX_LE_I32 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GT_I32 - - Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32() - { - } // ~Inst_VOP3__V_CMPX_GT_I32 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ne_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_NE_I32 - - Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32() - { - } // ~Inst_VOP3__V_CMPX_NE_I32 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GE_I32 - - Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32() - { - } // ~Inst_VOP3__V_CMPX_GE_I32 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_t_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_T_I32 - - Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32() - { - } // ~Inst_VOP3__V_CMPX_T_I32 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_U32 - - Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32() - { - } // ~Inst_VOP3__V_CMPX_F_U32 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LT_U32 - - Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32() - { - } // ~Inst_VOP3__V_CMPX_LT_U32 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_EQ_U32 - - Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32() - { - } // ~Inst_VOP3__V_CMPX_EQ_U32 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LE_U32 - - Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32() - { - } // ~Inst_VOP3__V_CMPX_LE_U32 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GT_U32 - - Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32() - { - } // ~Inst_VOP3__V_CMPX_GT_U32 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ne_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_NE_U32 - - Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32() - { - } // ~Inst_VOP3__V_CMPX_NE_U32 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GE_U32 - - Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32() - { - } // ~Inst_VOP3__V_CMPX_GE_U32 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_t_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_T_U32 - - Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32() - { - } // ~Inst_VOP3__V_CMPX_T_U32 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I64 - - Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64() - { - } // ~Inst_VOP3__V_CMP_F_I64 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I64 - - Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64() - { - } // ~Inst_VOP3__V_CMP_LT_I64 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I64 - - Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64() - { - } // ~Inst_VOP3__V_CMP_EQ_I64 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I64 - - Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64() - { - } // ~Inst_VOP3__V_CMP_LE_I64 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I64 - - Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64() - { - } // ~Inst_VOP3__V_CMP_GT_I64 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ne_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I64 - - Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64() - { - } // ~Inst_VOP3__V_CMP_NE_I64 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I64 - - Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64() - { - } // ~Inst_VOP3__V_CMP_GE_I64 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_t_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I64 - - Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64() - { - } // ~Inst_VOP3__V_CMP_T_I64 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_f_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U64 - - Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64() - { - } // ~Inst_VOP3__V_CMP_F_U64 - - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_lt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U64 - - Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64() - { - } // ~Inst_VOP3__V_CMP_LT_U64 - - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_eq_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U64 - - Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64() - { - } // ~Inst_VOP3__V_CMP_EQ_U64 - - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_le_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U64 - - Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64() - { - } // ~Inst_VOP3__V_CMP_LE_U64 - - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_gt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U64 - - Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64() - { - } // ~Inst_VOP3__V_CMP_GT_U64 - - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ne_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U64 - - Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64() - { - } // ~Inst_VOP3__V_CMP_NE_U64 - - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_ge_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U64 - - Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64() - { - } // ~Inst_VOP3__V_CMP_GE_U64 - - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmp_t_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U64 - - Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64() - { - } // ~Inst_VOP3__V_CMP_T_U64 - - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_I64 - - Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64() - { - } // ~Inst_VOP3__V_CMPX_F_I64 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LT_I64 - - Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64() - { - } // ~Inst_VOP3__V_CMPX_LT_I64 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_EQ_I64 - - Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64() - { - } // ~Inst_VOP3__V_CMPX_EQ_I64 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LE_I64 - - Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64() - { - } // ~Inst_VOP3__V_CMPX_LE_I64 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GT_I64 - - Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64() - { - } // ~Inst_VOP3__V_CMPX_GT_I64 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ne_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_NE_I64 - - Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64() - { - } // ~Inst_VOP3__V_CMPX_NE_I64 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GE_I64 - - Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64() - { - } // ~Inst_VOP3__V_CMPX_GE_I64 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_t_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_T_I64 - - Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64() - { - } // ~Inst_VOP3__V_CMPX_T_I64 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_f_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_F_U64 - - Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64() - { - } // ~Inst_VOP3__V_CMPX_F_U64 - - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_lt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LT_U64 - - Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64() - { - } // ~Inst_VOP3__V_CMPX_LT_U64 - - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_eq_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_EQ_U64 - - Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64() - { - } // ~Inst_VOP3__V_CMPX_EQ_U64 - - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_le_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_LE_U64 - - Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64() - { - } // ~Inst_VOP3__V_CMPX_LE_U64 - - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_gt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GT_U64 - - Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64() - { - } // ~Inst_VOP3__V_CMPX_GT_U64 - - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ne_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_NE_U64 - - Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64() - { - } // ~Inst_VOP3__V_CMPX_NE_U64 - - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_ge_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_GE_U64 - - Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64() - { - } // ~Inst_VOP3__V_CMPX_GE_U64 - - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cmpx_t_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMPX_T_U64 - - Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64() - { - } // ~Inst_VOP3__V_CMPX_T_U64 - - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } - - Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cndmask_b32", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - } // Inst_VOP3__V_CNDMASK_B32 - - Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32() - { - } // ~Inst_VOP3__V_CNDMASK_B32 - - // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC - // as a scalar GPR in S2. - void - Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(vcc.rawData(), lane) - ? src1[lane] : src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_add_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_ADD_F32 - - Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32() - { - } // ~Inst_VOP3__V_ADD_F32 - - // D.f = S0.f + S1.f. - void - Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sub_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SUB_F32 - - Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32() - { - } // ~Inst_VOP3__V_SUB_F32 - - // D.f = S0.f - S1.f. - void - Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_subrev_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SUBREV_F32 - - Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32() - { - } // ~Inst_VOP3__V_SUBREV_F32 - - // D.f = S1.f - S0.f. - void - Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MUL_LEGACY_F32 - - Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32() - { - } // ~Inst_VOP3__V_MUL_LEGACY_F32 - - // D.f = S0.f * S1.f - void - Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MUL_F32 - - Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32() - { - } // ~Inst_VOP3__V_MUL_F32 - - // D.f = S0.f * S1.f. - void - Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_i32_i24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_I32_I24 - - Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24() - { - } // ~Inst_VOP3__V_MUL_I32_I24 - - // D.i = S0.i[23:0] * S1.i[23:0]. - void - Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = szext<24>(src0[lane]) * szext<24>(src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_hi_i32_i24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_I32_I24 - - Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24() - { - } // ~Inst_VOP3__V_MUL_HI_I32_I24 - - // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32. - void - Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 tmp_src0 = (VecElemI64)szext<24>(src0[lane]); - VecElemI64 tmp_src1 = (VecElemI64)szext<24>(src1[lane]); - - vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_u32_u24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_U32_U24 - - Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24() - { - } // ~Inst_VOP3__V_MUL_U32_U24 - - // D.u = S0.u[23:0] * S1.u[23:0]. - void - Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_hi_u32_u24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_U32_U24 - - Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24() - { - } // ~Inst_VOP3__V_MUL_HI_U32_U24 - - // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32. - void - Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); - VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); - vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MIN_F32 - - Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32() - { - } // ~Inst_VOP3__V_MIN_F32 - - // D.f = (S0.f < S1.f ? S0.f : S1.f). - void - Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MAX_F32 - - Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32() - { - } // ~Inst_VOP3__V_MAX_F32 - - // D.f = (S0.f >= S1.f ? S0.f : S1.f). - void - Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_I32 - - Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32() - { - } // ~Inst_VOP3__V_MIN_I32 - - // D.i = min(S0.i, S1.i). - void - Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_I32 - - Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32() - { - } // ~Inst_VOP3__V_MAX_I32 - - // D.i = max(S0.i, S1.i). - void - Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_U32 - - Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32() - { - } // ~Inst_VOP3__V_MIN_U32 - - // D.u = min(S0.u, S1.u). - void - Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_U32 - - Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32() - { - } // ~Inst_VOP3__V_MAX_U32 - - // D.u = max(S0.u, S1.u). - void - Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lshrrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B32 - - Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32() - { - } // ~Inst_VOP3__V_LSHRREV_B32 - - // D.u = S1.u >> S0.u[4:0]. - // The vacated bits are set to zero. - void - Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ashrrev_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I32 - - Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32() - { - } // ~Inst_VOP3__V_ASHRREV_I32 - - // D.i = signext(S1.i) >> S0.i[4:0]. - // The vacated bits are set to the sign bit of the input value. - void - Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lshlrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B32 - - Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32() - { - } // ~Inst_VOP3__V_LSHLREV_B32 - - // D.u = S1.u << S0.u[4:0]. - void - Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_and_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_AND_B32 - - Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32() - { - } // ~Inst_VOP3__V_AND_B32 - - // D.u = S0.u & S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_OR_B32 - - Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32() - { - } // ~Inst_VOP3__V_OR_B32 - - // D.u = S0.u | S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_xor_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_XOR_B32 - - Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32() - { - } // ~Inst_VOP3__V_XOR_B32 - - // D.u = S0.u ^ S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] ^ src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mac_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAC); - } // Inst_VOP3__V_MAC_F32 - - Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32() - { - } // ~Inst_VOP3__V_MAC_F32 - - // D.f = S0.f * S1.f + D.f. - void - Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vdst.read(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_add_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_ADD_U32 - - Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() - { - } // ~Inst_VOP3__V_ADD_U32 - - // D.u = S0.u + S1.u; - // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED - // overflow or carry-out. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_sub_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_SUB_U32 - - Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() - { - } // ~Inst_VOP3__V_SUB_U32 - - // D.u = S0.u - S1.u; - // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32( - InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_subrev_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_SUBREV_U32 - - Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() - { - } // ~Inst_VOP3__V_SUBREV_U32 - - // D.u = S1.u - S0.u; - // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } - - Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_addc_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_ADDC_U32 - - Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32() - { - } // ~Inst_VOP3__V_ADDC_U32 - - // D.u = S0.u + S1.u + VCC[threadId]; - // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0) - // is an UNSIGNED overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] - + bits(vcc.rawData(), lane); - sdst.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] - + (VecElemU64)bits(vcc.rawData(), lane)) - >= 0x100000000 ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } - - Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_subb_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_SUBB_U32 - - Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32() - { - } // ~Inst_VOP3__V_SUBB_U32 - - // D.u = S0.u - S1.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane] - - bits(vcc.rawData(), lane); - sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } - - Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32( - InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_subbrev_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_SUBBREV_U32 - - Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32() - { - } // ~Inst_VOP3__V_SUBBREV_U32 - - // D.u = S1.u - S0.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane] - - bits(vcc.rawData(), lane); - sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } - - Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_add_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_ADD_F16 - - Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16() - { - } // ~Inst_VOP3__V_ADD_F16 - - // D.f16 = S0.f16 + S1.f16. - void - Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sub_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SUB_F16 - - Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16() - { - } // ~Inst_VOP3__V_SUB_F16 - - // D.f16 = S0.f16 - S1.f16. - void - Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_subrev_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SUBREV_F16 - - Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16() - { - } // ~Inst_VOP3__V_SUBREV_F16 - - // D.f16 = S1.f16 - S0.f16. - void - Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MUL_F16 - - Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16() - { - } // ~Inst_VOP3__V_MUL_F16 - - // D.f16 = S0.f16 * S1.f16. - void - Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mac_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(MAC); - } // Inst_VOP3__V_MAC_F16 - - Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16() - { - } // ~Inst_VOP3__V_MAC_F16 - - // D.f16 = S0.f16 * S1.f16 + D.f16. - void - Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_add_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_U16 - - Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16() - { - } // ~Inst_VOP3__V_ADD_U16 - - // D.u16 = S0.u16 + S1.u16. - void - Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sub_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUB_U16 - - Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16() - { - } // ~Inst_VOP3__V_SUB_U16 - - // D.u16 = S0.u16 - S1.u16. - void - Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_subrev_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUBREV_U16 - - Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16() - { - } // ~Inst_VOP3__V_SUBREV_U16 - - // D.u16 = S1.u16 - S0.u16. - void - Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_lo_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_LO_U16 - - Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16() - { - } // ~Inst_VOP3__V_MUL_LO_U16 - - // D.u16 = S0.u16 * S1.u16. - void - Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lshlrev_b16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B16 - - Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16() - { - } // ~Inst_VOP3__V_LSHLREV_B16 - - // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. - void - Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lshrrev_b16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B16 - - Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16() - { - } // ~Inst_VOP3__V_LSHRREV_B16 - - // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. - // The vacated bits are set to zero. - void - Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ashrrev_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I16 - - Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16() - { - } // ~Inst_VOP3__V_ASHRREV_I16 - - // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. - // The vacated bits are set to the sign bit of the input value. - void - Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MAX_F16 - - Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16() - { - } // ~Inst_VOP3__V_MAX_F16 - - // D.f16 = max(S0.f16, S1.f16). - void - Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MIN_F16 - - Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16() - { - } // ~Inst_VOP3__V_MIN_F16 - - // D.f16 = min(S0.f16, S1.f16). - void - Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_U16 - - Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16() - { - } // ~Inst_VOP3__V_MAX_U16 - - // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_I16 - - Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16() - { - } // ~Inst_VOP3__V_MAX_I16 - - // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_U16 - - Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16() - { - } // ~Inst_VOP3__V_MIN_U16 - - // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_I16 - - Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16() - { - } // ~Inst_VOP3__V_MIN_I16 - - // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ldexp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_LDEXP_F16 - - Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16() - { - } // ~Inst_VOP3__V_LDEXP_F16 - - // D.f16 = S0.f16 * (2 ** S1.i16). - void - Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_nop", false) - { - setFlag(Nop); - setFlag(ALU); - } // Inst_VOP3__V_NOP - - Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP() - { - } // ~Inst_VOP3__V_NOP - - // Do nothing. - void - Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } - - Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mov_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MOV_B32 - - Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32() - { - } // ~Inst_VOP3__V_MOV_B32 - - // D.u = S0.u. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_i32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_I32_F64 - - Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64() - { - } // ~Inst_VOP3__V_CVT_I32_F64 - - // D.i = (int)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f64_i32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_I32 - - Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32() - { - } // ~Inst_VOP3__V_CVT_F64_I32 - - // D.d = (double)S0.i. - void - Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_i32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_I32 - - Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32() - { - } // ~Inst_VOP3__V_CVT_F32_I32 - - // D.f = (float)S0.i. - void - Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - VecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_u32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_U32 - - Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32() - { - } // ~Inst_VOP3__V_CVT_F32_U32 - - // D.f = (float)S0.u. - void - Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_u32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_U32_F32 - - Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32() - { - } // ~Inst_VOP3__V_CVT_U32_F32 - - // D.u = (unsigned)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_I32_F32 - - Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32() - { - } // ~Inst_VOP3__V_CVT_I32_F32 - - // D.i = (int)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mov_fed_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MOV_FED_B32 - - Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32() - { - } // ~Inst_VOP3__V_MOV_FED_B32 - - // D.u = S0.u; - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F16_F32 - - Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32() - { - } // ~Inst_VOP3__V_CVT_F16_F32 - - // D.f16 = flt32_to_flt16(S0.f). - void - Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_f16", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_F16 - - Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16() - { - } // ~Inst_VOP3__V_CVT_F32_F16 - - // D.f = flt16_to_flt32(S0.f16). - void - Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_rpi_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_RPI_I32_F32 - - Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32() - { - } // ~Inst_VOP3__V_CVT_RPI_I32_F32 - - // D.i = (int)floor(S0.f + 0.5). - void - Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_flr_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_FLR_I32_F32 - - Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32() - { - } // ~Inst_VOP3__V_CVT_FLR_I32_F32 - - // D.i = (int)floor(S0.f). - void - Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_off_f32_i4", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_OFF_F32_I4 - - Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4() - { - } // ~Inst_VOP3__V_CVT_OFF_F32_I4 - - // 4-bit signed int to 32-bit float. - void - Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F32_F64 - - Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64() - { - } // ~Inst_VOP3__V_CVT_F32_F64 - - // D.f = (float)S0.d. - void - Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f64_f32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_F32 - - Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32() - { - } // ~Inst_VOP3__V_CVT_F64_F32 - - // D.d = (double)S0.f. - void - Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_ubyte0", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE0 - - Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE0 - - // D.f = (float)(S0.u[7:0]). - void - Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 7, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_ubyte1", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE1 - - Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE1 - - // D.f = (float)(S0.u[15:8]). - void - Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 15, 8); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_ubyte2", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE2 - - Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE2 - - // D.f = (float)(S0.u[23:16]). - void - Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 23, 16); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f32_ubyte3", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE3 - - Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE3 - - // D.f = (float)(S0.u[31:24]). - void - Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 31, 24); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_u32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_U32_F64 - - Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64() - { - } // ~Inst_VOP3__V_CVT_U32_F64 - - // D.u = (unsigned)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN - // is converted to 0. - void - Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f64_u32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_U32 - - Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32() - { - } // ~Inst_VOP3__V_CVT_F64_U32 - - // D.d = (double)S0.u. - void - Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_trunc_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_TRUNC_F64 - - Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64() - { - } // ~Inst_VOP3__V_TRUNC_F64 - - // D.d = trunc(S0.d), return integer part of S0.d. - void - Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ceil_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CEIL_F64 - - Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64() - { - } // ~Inst_VOP3__V_CEIL_F64 - - // D.d = ceil(S0.d); - void - Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rndne_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RNDNE_F64 - - Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64() - { - } // ~Inst_VOP3__V_RNDNE_F64 - - // D.d = round_nearest_even(S0.d). - void - Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_floor_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FLOOR_F64 - - Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64() - { - } // ~Inst_VOP3__V_FLOOR_F64 - - // D.d = floor(S0.d); - void - Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_fract_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FRACT_F32 - - Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32() - { - } // ~Inst_VOP3__V_FRACT_F32 - - // D.f = modf(S0.f). - void - Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } - - Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_trunc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_TRUNC_F32 - - Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32() - { - } // ~Inst_VOP3__V_TRUNC_F32 - - // D.f = trunc(S0.f), return integer part of S0.f. - void - Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ceil_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CEIL_F32 - - Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32() - { - } // ~Inst_VOP3__V_CEIL_F32 - - // D.f = ceil(S0.f); - void - Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rndne_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RNDNE_F32 - - Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32() - { - } // ~Inst_VOP3__V_RNDNE_F32 - - // D.f = round_nearest_even(S0.f). - void - Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_floor_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FLOOR_F32 - - Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32() - { - } // ~Inst_VOP3__V_FLOOR_F32 - - // D.f = floor(S0.f); - void - Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_exp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_EXP_F32 - - Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32() - { - } // ~Inst_VOP3__V_EXP_F32 - - // D.f = pow(2.0, S0.f). - void - Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_log_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LOG_F32 - - Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32() - { - } // ~Inst_VOP3__V_LOG_F32 - - // D.f = log2(S0.f). - void - Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rcp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RCP_F32 - - Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32() - { - } // ~Inst_VOP3__V_RCP_F32 - - // D.f = 1.0 / S0.f. - void - Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rcp_iflag_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RCP_IFLAG_F32 - - Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32() - { - } // ~Inst_VOP3__V_RCP_IFLAG_F32 - - // D.f = 1.0 / S0.f. - void - Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rsq_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RSQ_F32 - - Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32() - { - } // ~Inst_VOP3__V_RSQ_F32 - - // D.f = 1.0 / sqrt(S0.f). - void - Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rcp_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RCP_F64 - - Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64() - { - } // ~Inst_VOP3__V_RCP_F64 - - // D.d = 1.0 / S0.d. - void - Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = 1.0 / src[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rsq_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RSQ_F64 - - Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64() - { - } // ~Inst_VOP3__V_RSQ_F64 - - // D.d = 1.0 / sqrt(S0.d). - void - Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) { - vdst[lane] = 0.0; - } else if (std::signbit(src[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sqrt_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SQRT_F32 - - Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32() - { - } // ~Inst_VOP3__V_SQRT_F32 - - // D.f = sqrt(S0.f). - void - Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sqrt_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_SQRT_F64 - - Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64() - { - } // ~Inst_VOP3__V_SQRT_F64 - - // D.d = sqrt(S0.d). - void - Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sin_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SIN_F32 - - Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32() - { - } // ~Inst_VOP3__V_SIN_F32 - - // D.f = sin(S0.f * 2 * PI). - void - Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sin(src[lane] * 2 * pi.rawData()); - } - } - - vdst.write(); - } - - Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cos_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_COS_F32 - - Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32() - { - } // ~Inst_VOP3__V_COS_F32 - - // D.f = cos(S0.f * 2 * PI). - void - Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::cos(src[lane] * 2 * pi.rawData()); - } - } - - vdst.write(); - } - - Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_not_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_NOT_B32 - - Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32() - { - } // ~Inst_VOP3__V_NOT_B32 - - // D.u = ~S0.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ~src[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_bfrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFREV_B32 - - Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32() - { - } // ~Inst_VOP3__V_BFREV_B32 - - // D.u[31:0] = S0.u[0:31], bitfield reverse. - // Input and output modifiers not supported. - void - Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = reverseBits(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ffbh_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBH_U32 - - Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32() - { - } // ~Inst_VOP3__V_FFBH_U32 - - // D.u = position of first 1 in S0.u from MSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOneMsb(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ffbl_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBL_B32 - - Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32() - { - } // ~Inst_VOP3__V_FFBL_B32 - - // D.u = position of first 1 in S0.u from LSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOne(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ffbh_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBH_I32 - - Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32() - { - } // ~Inst_VOP3__V_FFBH_I32 - - // D.u = position of first bit different from sign bit in S0.i from MSB; - // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. - void - Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = firstOppositeSignBit(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_frexp_exp_i32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FREXP_EXP_I32_F64 - - Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64() - { - } // ~Inst_VOP3__V_FREXP_EXP_I32_F64 - - // See V_FREXP_EXP_I32_F32. - void - Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_frexp_mant_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FREXP_MANT_F64 - - Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64() - { - } // ~Inst_VOP3__V_FREXP_MANT_F64 - - void - Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_fract_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FRACT_F64 - - Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64() - { - } // ~Inst_VOP3__V_FRACT_F64 - - void - Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_frexp_exp_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FREXP_EXP_I32_F32 - - Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32() - { - } // ~Inst_VOP3__V_FREXP_EXP_I32_F32 - - // frexp(S0.f, Exponenti(S0.f)) - // if (S0.f == INF || S0.f == NAN) then D.i = 0; - // else D.i = Exponent(S0.f) - void - Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane])|| std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_frexp_mant_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FREXP_MANT_F32 - - Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32() - { - } // ~Inst_VOP3__V_FREXP_MANT_F32 - - // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; - // else D.f = Mantissa(S0.f). - void - Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_clrexcp", false) - { - } // Inst_VOP3__V_CLREXCP - - Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP() - { - } // ~Inst_VOP3__V_CLREXCP - - void - Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f16_u16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_F16_U16 - - Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16() - { - } // ~Inst_VOP3__V_CVT_F16_U16 - - // D.f16 = uint16_to_flt16(S.u16). - void - Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_f16_i16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_F16_I16 - - Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16() - { - } // ~Inst_VOP3__V_CVT_F16_I16 - - // D.f16 = int16_to_flt16(S.i16). - void - Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_u16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_U16_F16 - - Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16() - { - } // ~Inst_VOP3__V_CVT_U16_F16 - - // D.u16 = flt16_to_uint16(S.f16). - void - Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_i16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_I16_F16 - - Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16() - { - } // ~Inst_VOP3__V_CVT_I16_F16 - - // D.i16 = flt16_to_int16(S.f16). - void - Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rcp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RCP_F16 - - Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16() - { - } // ~Inst_VOP3__V_RCP_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = 1 / S0.f16. - void - Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sqrt_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SQRT_F16 - - Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16() - { - } // ~Inst_VOP3__V_SQRT_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = sqrt(S0.f16). - void - Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rsq_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RSQ_F16 - - Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16() - { - } // ~Inst_VOP3__V_RSQ_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = 1 / sqrt(S0.f16). - void - Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_log_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_LOG_F16 - - Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16() - { - } // ~Inst_VOP3__V_LOG_F16 - - // if (S0.f16 == 1.0f) - // D.f16 = 0.0f; - // else - // D.f16 = log2(S0.f16). - void - Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_exp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_EXP_F16 - - Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16() - { - } // ~Inst_VOP3__V_EXP_F16 - - // if (S0.f16 == 0.0f) - // D.f16 = 1.0f; - // else - // D.f16 = pow(2.0, S0.f16). - void - Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_frexp_mant_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FREXP_MANT_F16 - - Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16() - { - } // ~Inst_VOP3__V_FREXP_MANT_F16 - - // if (S0.f16 == +-INF || S0.f16 == NAN) - // D.f16 = S0.f16; - // else - // D.f16 = mantissa(S0.f16). - void - Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_frexp_exp_i16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FREXP_EXP_I16_F16 - - Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16() - { - } // ~Inst_VOP3__V_FREXP_EXP_I16_F16 - - void - Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_floor_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FLOOR_F16 - - Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16() - { - } // ~Inst_VOP3__V_FLOOR_F16 - - // D.f16 = floor(S0.f16); - void - Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ceil_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CEIL_F16 - - Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16() - { - } // ~Inst_VOP3__V_CEIL_F16 - - // D.f16 = ceil(S0.f16); - void - Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_trunc_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_TRUNC_F16 - - Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16() - { - } // ~Inst_VOP3__V_TRUNC_F16 - - // D.f16 = trunc(S0.f16). - void - Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_rndne_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RNDNE_F16 - - Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16() - { - } // ~Inst_VOP3__V_RNDNE_F16 - - // D.f16 = roundNearestEven(S0.f16); - void - Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_fract_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FRACT_F16 - - Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16() - { - } // ~Inst_VOP3__V_FRACT_F16 - - // D.f16 = S0.f16 + -floor(S0.f16). - void - Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sin_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SIN_F16 - - Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16() - { - } // ~Inst_VOP3__V_SIN_F16 - - // D.f16 = sin(S0.f16 * 2 * PI). - void - Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cos_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_COS_F16 - - Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16() - { - } // ~Inst_VOP3__V_COS_F16 - - // D.f16 = cos(S0.f16 * 2 * PI). - void - Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_exp_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_EXP_LEGACY_F32 - - Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32() - { - } // ~Inst_VOP3__V_EXP_LEGACY_F32 - - // D.f = pow(2.0, S0.f) - void - Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_log_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LOG_LEGACY_F32 - - Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32() - { - } // ~Inst_VOP3__V_LOG_LEGACY_F32 - - // D.f = log2(S0.f). - void - Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP3__V_MAD_LEGACY_F32 - - Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32() - { - } // ~Inst_VOP3__V_MAD_LEGACY_F32 - - // D.f = S0.f * S1.f + S2.f - void - Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP3__V_MAD_F32 - - Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32() - { - } // ~Inst_VOP3__V_MAD_F32 - - // D.f = S0.f * S1.f + S2.f. - void - Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_i32_i24", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I32_I24 - - Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24() - { - } // ~Inst_VOP3__V_MAD_I32_I24 - - // D.i = S0.i[23:0] * S1.i[23:0] + S2.i. - void - Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = szext<24>(src0[lane]) - * szext<24>(src1[lane]) + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_u32_u24", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U32_U24 - - Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24() - { - } // ~Inst_VOP3__V_MAD_U32_U24 - - // D.u = S0.u[23:0] * S1.u[23:0] + S2.u. - void - Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0) - + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cubeid_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBEID_F32 - - Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32() - { - } // ~Inst_VOP3__V_CUBEID_F32 - - void - Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cubesc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBESC_F32 - - Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32() - { - } // ~Inst_VOP3__V_CUBESC_F32 - - void - Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cubetc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBETC_F32 - - Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32() - { - } // ~Inst_VOP3__V_CUBETC_F32 - - void - Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cubema_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBEMA_F32 - - Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32() - { - } // ~Inst_VOP3__V_CUBEMA_F32 - - void - Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_bfe_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFE_U32 - - Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32() - { - } // ~Inst_VOP3__V_BFE_U32 - - // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1). - // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width. - void - Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) - & ((1 << bits(src2[lane], 4, 0)) - 1); - } - } - - vdst.write(); - } - - Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_bfe_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFE_I32 - - Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32() - { - } // ~Inst_VOP3__V_BFE_I32 - - // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1). - // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width. - void - Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) - & ((1 << bits(src2[lane], 4, 0)) - 1); - } - } - - vdst.write(); - } - - Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_bfi_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFI_B32 - - Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32() - { - } // ~Inst_VOP3__V_BFI_B32 - - // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert. - void - Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane] - & src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_fma_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F32 - - Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32() - { - } // ~Inst_VOP3__V_FMA_F32 - - // D.f = S0.f * S1.f + S2.f. - void - Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_fma_f64", false) - { - setFlag(ALU); - setFlag(F64); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F64 - - Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64() - { - } // ~Inst_VOP3__V_FMA_F64 - - // D.d = S0.d * S1.d + S2.d. - void - Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lerp_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LERP_U8 - - Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8() - { - } // ~Inst_VOP3__V_LERP_U8 - - // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24 - // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16; - // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8; - // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1). - void - Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ((bits(src0[lane], 31, 24) - + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1) - << 24; - vdst[lane] += ((bits(src0[lane], 23, 16) - + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1) - << 16; - vdst[lane] += ((bits(src0[lane], 15, 8) - + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1) - << 8; - vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0) - + bits(src2[lane], 0)) >> 1); - } - } - - vdst.write(); - } - - Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_alignbit_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ALIGNBIT_B32 - - Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32() - { - } // ~Inst_VOP3__V_ALIGNBIT_B32 - - // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff. - void - Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) - | (VecElemU64)src1[lane]); - vdst[lane] = (VecElemU32)((src_0_1 - >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff); - } - } - - vdst.write(); - } - - Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_alignbyte_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ALIGNBYTE_B32 - - Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32() - { - } // ~Inst_VOP3__V_ALIGNBYTE_B32 - - // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff. - void - Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) - | (VecElemU64)src1[lane]); - vdst[lane] = (VecElemU32)((src_0_1 - >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0))) - & 0xffffffff); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MIN3_F32 - - Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32() - { - } // ~Inst_VOP3__V_MIN3_F32 - - // D.f = min(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]); - vdst[lane] = std::fmin(min_0_1, src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN3_I32 - - Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32() - { - } // ~Inst_VOP3__V_MIN3_I32 - - // D.i = min(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]); - vdst[lane] = std::min(min_0_1, src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN3_U32 - - Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32() - { - } // ~Inst_VOP3__V_MIN3_U32 - - // D.u = min(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]); - vdst[lane] = std::min(min_0_1, src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MAX3_F32 - - Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32() - { - } // ~Inst_VOP3__V_MAX3_F32 - - // D.f = max(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]); - vdst[lane] = std::fmax(max_0_1, src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX3_I32 - - Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32() - { - } // ~Inst_VOP3__V_MAX3_I32 - - // D.i = max(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]); - vdst[lane] = std::max(max_0_1, src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX3_U32 - - Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32() - { - } // ~Inst_VOP3__V_MAX3_U32 - - // D.u = max(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]); - vdst[lane] = std::max(max_0_1, src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_med3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MED3_F32 - - Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32() - { - } // ~Inst_VOP3__V_MED3_F32 - - // D.f = median(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_med3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MED3_I32 - - Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32() - { - } // ~Inst_VOP3__V_MED3_I32 - - // D.i = median(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_med3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MED3_U32 - - Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32() - { - } // ~Inst_VOP3__V_MED3_U32 - - // D.u = median(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sad_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U8 - - Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8() - { - } // ~Inst_VOP3__V_SAD_U8 - - // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) + - // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u. - // Sum of absolute differences with accumulation, overflow into upper bits - // is allowed. - void - Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(bits(src0[lane], 31, 24) - - bits(src1[lane], 31, 24)) - + std::abs(bits(src0[lane], 23, 16) - - bits(src1[lane], 23, 16)) - + std::abs(bits(src0[lane], 15, 8) - - bits(src1[lane], 15, 8)) - + std::abs(bits(src0[lane], 7, 0) - - bits(src1[lane], 7, 0)) + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sad_hi_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_HI_U8 - - Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8() - { - } // ~Inst_VOP3__V_SAD_HI_U8 - - // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u. - // Sum of absolute differences with accumulation, overflow is lost. - void - Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (((bits(src0[lane], 31, 24) - - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16) - - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8) - - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0) - - bits(src1[lane], 7, 0))) << 16) + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sad_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U16 - - Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16() - { - } // ~Inst_VOP3__V_SAD_U16 - - // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0]) - // + S2.u. - // Word SAD with accumulation. - void - Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(bits(src0[lane], 31, 16) - - bits(src1[lane], 31, 16)) - + std::abs(bits(src0[lane], 15, 0) - - bits(src1[lane], 15, 0)) + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_sad_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U32 - - Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32() - { - } // ~Inst_VOP3__V_SAD_U32 - - // D.u = abs(S0.i - S1.i) + S2.u. - // Dword SAD with accumulation. - void - Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pk_u8_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PK_U8_F32 - - Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32() - { - } // ~Inst_VOP3__V_CVT_PK_U8_F32 - - // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0])) - // | (S2.u & ~(0xff << (8 * S1.u[1:0]))). - // Convert floating point value S0 to 8-bit unsigned integer and pack the - // result into byte S1 of dword S2. - void - Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (((VecElemU8)src0[lane] & 0xff) - << (8 * bits(src1[lane], 1, 0))) - | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0)))); - } - } - - vdst.write(); - } - - Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_div_fixup_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_DIV_FIXUP_F32 - - Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F32 - - // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator, - // s2.f = Numerator. - void - Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::signbit(src1[lane])) { - vdst[lane] = -INFINITY; - } else { - vdst[lane] = +INFINITY; - } - } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src1[lane])) { - if (std::signbit(src1[lane])) { - vdst[lane] = -INFINITY; - } else { - vdst[lane] = +INFINITY; - } - } else { - vdst[lane] = src2[lane] / src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_div_fixup_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_DIV_FIXUP_F64 - - Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F64 - - // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator, - // s2.d = Numerator. - void - Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int sign_out = std::signbit(src1[lane]) - ^ std::signbit(src2[lane]); - int exp1(0); - int exp2(0); - std::frexp(src1[lane], &exp1); - std::frexp(src2[lane], &exp2); - - if (std::isnan(src1[lane]) || std::isnan(src2[lane])) { - vdst[lane] = std::numeric_limits::quiet_NaN(); - } else if (std::fpclassify(src1[lane]) == FP_ZERO - && std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] - = std::numeric_limits::signaling_NaN(); - } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) { - vdst[lane] - = std::numeric_limits::signaling_NaN(); - } else if (std::fpclassify(src1[lane]) == FP_ZERO - || std::isinf(src2[lane])) { - vdst[lane] = sign_out ? -INFINITY : +INFINITY; - } else if (std::isinf(src1[lane]) - || std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] = sign_out ? -0.0 : +0.0; - } else if (exp2 - exp1 < -1075) { - vdst[lane] = src0[lane]; - } else if (exp1 == 2047) { - vdst[lane] = src0[lane]; - } else { - vdst[lane] = sign_out ? -std::fabs(src0[lane]) - : std::fabs(src0[lane]); - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32( - InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(F32); - } // Inst_VOP3__V_DIV_SCALE_F32 - - Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32() - { - } // ~Inst_VOP3__V_DIV_SCALE_F32 - - // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f = - // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a - // numerator and denominator, this opcode will appropriately scale inputs - // for division to avoid subnormal terms during Newton-Raphson correction - // algorithm. This opcode producses a VCC flag for post-scale of quotient. - void - Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane]; - vcc.setBit(lane, 0); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_SCALE_F64 class methods --- - - Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64( - InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f64") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(F64); - } // Inst_VOP3__V_DIV_SCALE_F64 - - Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64() - { - } // ~Inst_VOP3__V_DIV_SCALE_F64 - - // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d = - // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a - // numerator and denominator, this opcode will appropriately scale inputs - // for division to avoid subnormal terms during Newton-Raphson correction - // algorithm. This opcode producses a VCC flag for post-scale of quotient. - void - Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp1(0); - int exp2(0); - std::frexp(src1[lane], &exp1); - std::frexp(src2[lane], &exp2); - vcc.setBit(lane, 0); - - if (std::fpclassify(src1[lane]) == FP_ZERO - || std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (exp2 - exp1 >= 768) { - vcc.setBit(lane, 1); - if (src0[lane] == src1[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) { - vdst[lane] = std::ldexp(src0[lane], 128); - } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL - && std::fpclassify(src2[lane] / src1[lane]) - == FP_SUBNORMAL) { - vcc.setBit(lane, 1); - if (src0[lane] == src1[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) { - vdst[lane] = std::ldexp(src0[lane], -128); - } else if (std::fpclassify(src2[lane] / src1[lane]) - == FP_SUBNORMAL) { - vcc.setBit(lane, 1); - if (src0[lane] == src2[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (exp2 <= 53) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } - } - - vcc.write(); - vdst.write(); - } - - Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_div_fmas_f32", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - setFlag(F32); - setFlag(FMA); - } // Inst_VOP3__V_DIV_FMAS_F32 - - Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32() - { - } // ~Inst_VOP3__V_DIV_FMAS_F32 - - // D.f = Special case divide FMA with scale and flags(s0.f = Quotient, - // s1.f = Denominator, s2.f = Numerator) - void - Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - //vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FMAS_F64 class methods --- - - Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_div_fmas_f64", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - setFlag(F64); - setFlag(FMA); - } // Inst_VOP3__V_DIV_FMAS_F64 - - Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64() - { - } // ~Inst_VOP3__V_DIV_FMAS_F64 - - // D.d = Special case divide FMA with scale and flags(s0.d = Quotient, - // s1.d = Denominator, s2.d = Numerator) - void - Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - vcc.read(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(vcc.rawData(), lane)) { - vdst[lane] = std::pow(2, 64) - * std::fma(src0[lane], src1[lane], src2[lane]); - } else { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_msad_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MSAD_U8 - - Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8() - { - } // ~Inst_VOP3__V_MSAD_U8 - - // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_qsad_pk_u16_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_QSAD_PK_U16_U8 - - Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8() - { - } // ~Inst_VOP3__V_QSAD_PK_U16_U8 - - // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], - // S1.u[31:0], S2.u[63:0]) - void - Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mqsad_pk_u16_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MQSAD_PK_U16_U8 - - Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8() - { - } // ~Inst_VOP3__V_MQSAD_PK_U16_U8 - - // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], - // S1.u[31:0], S2.u[63:0]) - void - Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mqsad_u32_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MQSAD_U32_U8 - - Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8() - { - } // ~Inst_VOP3__V_MQSAD_U32_U8 - - // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0], - // S1.u[31:0], S2.u[127:0]) - void - Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32( - InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_mad_u64_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U64_U32 - - Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32() - { - } // ~Inst_VOP3__V_MAD_U64_U32 - - // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64. - void - Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - vdst.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], - src2[lane])); - } - } - - vcc.write(); - vdst.write(); - } - - Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32( - InFmt_VOP3_SDST_ENC *iFmt) - : Inst_VOP3_SDST_ENC(iFmt, "v_mad_i64_i32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I64_I32 - - Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32() - { - } // ~Inst_VOP3__V_MAD_I64_I32 - - // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64. - void - Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandI64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], - src2[lane])); - } - } - - vcc.write(); - vdst.write(); - } - - Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP3__V_MAD_F16 - - Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16() - { - } // ~Inst_VOP3__V_MAD_F16 - - // D.f16 = S0.f16 * S1.f16 + S2.f16. - // Supports round mode, exception flags, saturation. - void - Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_u16", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U16 - - Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16() - { - } // ~Inst_VOP3__V_MAD_U16 - - // D.u16 = S0.u16 * S1.u16 + S2.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src2(gpuDynInst, extData.SRC2); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane] + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mad_i16", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I16 - - Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16() - { - } // ~Inst_VOP3__V_MAD_I16 - - // D.i16 = S0.i16 * S1.i16 + S2.i16. - // Supports saturation (signed 16-bit integer domain). - void - Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI16 src2(gpuDynInst, extData.SRC2); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane] + src2[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_perm_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_PERM_B32 - - Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32() - { - } // ~Inst_VOP3__V_PERM_B32 - - // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]); - // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]); - // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]); - // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]); - // byte permute(byte in[8], byte sel) { - // if(sel>=13) then return 0xff; - // elsif(sel==12) then return 0x00; - // elsif(sel==11) then return in[7][7] * 0xff; - // elsif(sel==10) then return in[5][7] * 0xff; - // elsif(sel==9) then return in[3][7] * 0xff; - // elsif(sel==8) then return in[1][7] * 0xff; - // else return in[sel]; - // } - void - Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 selector = (VecElemU64)src0[lane]; - selector = (selector << 32) | (VecElemU64)src1[lane]; - vdst[lane] = 0; - - DPRINTF(GCN3, "Executing v_perm_b32 src_0 0x%08x, src_1 " - "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane], - src1[lane], src2[lane], vdst[lane]); - DPRINTF(GCN3, "Selector: 0x%08x \n", selector); - - for (int i = 0; i < 4 ; ++i) { - VecElemU32 permuted_val = permute(selector, 0xFF - & ((VecElemU32)src2[lane] >> (8 * i))); - vdst[lane] |= (permuted_val << (8 * i)); - } - - DPRINTF(GCN3, "v_perm result: 0x%08x\n", vdst[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_fma_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F16 - - Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16() - { - } // ~Inst_VOP3__V_FMA_F16 - - // D.f16 = S0.f16 * S1.f16 + S2.f16. - // Fused half precision multiply add. - void - Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_div_fixup_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_DIV_FIXUP_F16 - - Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F16 - - // sign_out = sign(S1.f16)^sign(S2.f16); - // if (S2.f16 == NAN) - // D.f16 = Quiet(S2.f16); - // else if (S1.f16 == NAN) - // D.f16 = Quiet(S1.f16); - // else if (S1.f16 == S2.f16 == 0) - // # 0/0 - // D.f16 = pele_nan(0xfe00); - // else if (abs(S1.f16) == abs(S2.f16) == +-INF) - // # inf/inf - // D.f16 = pele_nan(0xfe00); - // else if (S1.f16 ==0 || abs(S2.f16) == +-INF) - // # x/0, or inf/y - // D.f16 = sign_out ? -INF : INF; - // else if (abs(S1.f16) == +-INF || S2.f16 == 0) - // # x/inf, 0/y - // D.f16 = sign_out ? -0 : 0; - // else if ((exp(S2.f16) - exp(S1.f16)) < -150) - // D.f16 = sign_out ? -underflow : underflow; - // else if (exp(S1.f16) == 255) - // D.f16 = sign_out ? -overflow : overflow; - // else - // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16). - // Half precision division fixup. - // S0 = Quotient, S1 = Denominator, S3 = Numerator. - // Given a numerator, denominator, and quotient from a divide, this opcode - // will detect and apply special case numerics, touching up the quotient if - // necessary. This opcode also generates invalid, denorm and divide by - // zero exceptions caused by the division. - void - Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pkaccum_u8_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32() - { - } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - // byte = S1.u[1:0]; bit = byte * 8; - // D.u[bit + 7:bit] = flt32_to_uint8(S0.f); - // Pack converted value of S0.f into byte S1 of the destination. - // SQ translates to V_CVT_PK_U8_F32. - // Note: this opcode uses src_c to pass destination in as a source. - void - Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_interp_p1_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_P1_F32 - - Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32() - { - } // ~Inst_VOP3__V_INTERP_P1_F32 - - // D.f = P10 * S.f + P0; - void - Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_interp_p2_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_P2_F32 - - Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32() - { - } // ~Inst_VOP3__V_INTERP_P2_F32 - - // D.f = P20 * S.f + D.f; - void - Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_interp_mov_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_MOV_F32 - - Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32() - { - } // ~Inst_VOP3__V_INTERP_MOV_F32 - - // D.f = {P10,P20,P0}[S.u]; parameter load. - void - Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_interp_p1ll_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P1LL_F16 - - Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16() - { - } // ~Inst_VOP3__V_INTERP_P1LL_F16 - - // D.f32 = P10.f16 * S0.f32 + P0.f16. - void - Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_interp_p1lv_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P1LV_F16 - - Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16() - { - } // ~Inst_VOP3__V_INTERP_P1LV_F16 - - void - Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_interp_p2_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P2_F16 - - Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16() - { - } // ~Inst_VOP3__V_INTERP_P2_F16 - - // D.f16 = P20.f16 * S0.f32 + S2.f32. - void - Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_add_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_ADD_F64 - - Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64() - { - } // ~Inst_VOP3__V_ADD_F64 - - // D.d = S0.d + S1.d. - void - Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane]) ) { - vdst[lane] = NAN; - } else if (std::isinf(src0[lane]) && - std::isinf(src1[lane])) { - if (std::signbit(src0[lane]) != - std::signbit(src1[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = src0[lane]; - } - } else if (std::isinf(src0[lane])) { - vdst[lane] = src0[lane]; - } else if (std::isinf(src1[lane])) { - vdst[lane] = src1[lane]; - } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::signbit(src0[lane]) && - std::signbit(src1[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = src1[lane]; - } - } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::signbit(src0[lane]) && - std::signbit(src1[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = src0[lane]; - } - } else { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MUL_F64 - - Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64() - { - } // ~Inst_VOP3__V_MUL_F64 - - // D.d = S0.d * S1.d. - void - Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_min_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MIN_F64 - - Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64() - { - } // ~Inst_VOP3__V_MIN_F64 - - // D.d = min(S0.d, S1.d). - void - Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_max_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MAX_F64 - - Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64() - { - } // ~Inst_VOP3__V_MAX_F64 - - // D.d = max(S0.d, S1.d). - void - Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ldexp_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_LDEXP_F64 - - Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64() - { - } // ~Inst_VOP3__V_LDEXP_F64 - - // D.d = pow(S0.d, S1.i[31:0]). - void - Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || std::isinf(src0[lane])) { - vdst[lane] = src0[lane]; - } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - || std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::signbit(src0[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = +0.0; - } - } else { - vdst[lane] = std::ldexp(src0[lane], src1[lane]); - } - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_lo_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_LO_U32 - - Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32() - { - } // ~Inst_VOP3__V_MUL_LO_U32 - - // D.u = S0.u * S1.u. - void - Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_hi_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_U32 - - Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32() - { - } // ~Inst_VOP3__V_MUL_HI_U32 - - // D.u = (S0.u * S1.u) >> 32. - void - Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] - = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL); - } - } - - vdst.write(); - } - - Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mul_hi_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_I32 - - Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32() - { - } // ~Inst_VOP3__V_MUL_HI_I32 - - // D.i = (S0.i * S1.i) >> 32. - void - Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] - = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ldexp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LDEXP_F32 - - Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32() - { - } // ~Inst_VOP3__V_LDEXP_F32 - - // D.f = pow(S0.f, S1.i) - void - Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ldexp(src0[lane], src1[lane]); - } - } - - vdst.write(); - } - - Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_readlane_b32", true) - { - setFlag(ALU); - setFlag(IgnoreExec); - } // Inst_VOP3__V_READLANE_B32 - - Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32() - { - } // ~Inst_VOP3__V_READLANE_B32 - - // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR# - // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - sdst = src0[src1.rawData() & 0x3f]; - - sdst.write(); - } - - Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_writelane_b32", false) - { - setFlag(ALU); - setFlag(IgnoreExec); - } // Inst_VOP3__V_WRITELANE_B32 - - Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32() - { - } // ~Inst_VOP3__V_WRITELANE_B32 - - // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data - // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores - // exec mask. Input and output modifiers not supported; this is an untyped - // operation. - void - Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.read(); - src1.read(); - vdst.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - vdst[src1.rawData() & 0x3f] = src0.rawData(); - - vdst.write(); - } - - Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_bcnt_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BCNT_U32_B32 - - Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32() - { - } // ~Inst_VOP3__V_BCNT_U32_B32 - - // D.u = CountOneBits(S0.u) + S1.u. Bit count. - void - Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = popCount(src0[lane]) + src1[lane]; - } - } - - vdst.write(); - } - - Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mbcnt_lo_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MBCNT_LO_U32_B32 - - Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32() - { - } // ~Inst_VOP3__V_MBCNT_LO_U32_B32 - - // Masked bit count, ThreadPosition is the position of this thread in the - // wavefront (in 0..63). - void - Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - uint64_t threadMask = 0; - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - threadMask = ((1LL << lane) - 1LL); - vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- - - Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_mbcnt_hi_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MBCNT_HI_U32_B32 - - Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32() - { - } // ~Inst_VOP3__V_MBCNT_HI_U32_B32 - - // ThreadMask = (1 << ThreadPosition) - 1; - // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u. - // Masked bit count, ThreadPosition is the position of this thread in the - // wavefront (in 0..63). - void - Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - uint64_t threadMask = 0; - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - threadMask = ((1LL << lane) - 1LL); - vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B64 class methods --- - - Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lshlrev_b64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B64 - - Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64() - { - } // ~Inst_VOP3__V_LSHLREV_B64 - - // D.u64 = S1.u64 << S0.u[5:0]. - void - Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_lshrrev_b64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B64 - - Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64() - { - } // ~Inst_VOP3__V_LSHRREV_B64 - - // D.u64 = S1.u64 >> S0.u[5:0]. - // The vacated bits are set to zero. - void - Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_ashrrev_i64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I64 - - Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64() - { - } // ~Inst_VOP3__V_ASHRREV_I64 - - // D.u64 = signext(S1.u64) >> S0.u[5:0]. - // The vacated bits are set to the sign bit of the input value. - void - Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src1[lane] >> bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_trig_preop_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_TRIG_PREOP_F64 - - Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64() - { - } // ~Inst_VOP3__V_TRIG_PREOP_F64 - - void - Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_bfm_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFM_B32 - - Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32() - { - } // ~Inst_VOP3__V_BFM_B32 - - // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0]; - void - Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1) - << bits(src1[lane], 4, 0); - } - } - - vdst.write(); - } - - Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pknorm_i16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKNORM_I16_F32 - - Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32() - { - } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32 - - // D = {(snorm)S1.f, (snorm)S0.f}. - void - Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pknorm_u16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKNORM_U16_F32 - - Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32() - { - } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32 - - // D = {(unorm)S1.f, (unorm)S0.f}. - void - Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32( - InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pkrtz_f16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32() - { - } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - void - Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pk_u16_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_CVT_PK_U16_U32 - - Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32() - { - } // ~Inst_VOP3__V_CVT_PK_U16_U32 - - // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}. - void - Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3 *iFmt) - : Inst_VOP3(iFmt, "v_cvt_pk_i16_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_CVT_PK_I16_I32 - - Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32() - { - } // ~Inst_VOP3__V_CVT_PK_I16_I32 - - // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}. - void - Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_u32") - { - } // Inst_DS__DS_ADD_U32 - - Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() - { - } // ~Inst_DS__DS_ADD_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_u32") - { - } // Inst_DS__DS_SUB_U32 - - Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32() - { - } // ~Inst_DS__DS_SUB_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_u32") - { - } // Inst_DS__DS_RSUB_U32 - - Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32() - { - } // ~Inst_DS__DS_RSUB_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_u32") - { - } // Inst_DS__DS_INC_U32 - - Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32() - { - } // ~Inst_DS__DS_INC_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_u32") - { - } // Inst_DS__DS_DEC_U32 - - Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32() - { - } // ~Inst_DS__DS_DEC_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_i32") - { - } // Inst_DS__DS_MIN_I32 - - Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32() - { - } // ~Inst_DS__DS_MIN_I32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_i32") - { - } // Inst_DS__DS_MAX_I32 - - Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32() - { - } // ~Inst_DS__DS_MAX_I32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_u32") - { - } // Inst_DS__DS_MIN_U32 - - Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32() - { - } // ~Inst_DS__DS_MIN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_u32") - { - } // Inst_DS__DS_MAX_U32 - - Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32() - { - } // ~Inst_DS__DS_MAX_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_b32") - { - } // Inst_DS__DS_AND_B32 - - Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32() - { - } // ~Inst_DS__DS_AND_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_b32") - { - } // Inst_DS__DS_OR_B32 - - Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32() - { - } // ~Inst_DS__DS_OR_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_b32") - { - } // Inst_DS__DS_XOR_B32 - - Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32() - { - } // ~Inst_DS__DS_XOR_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_b32") - { - } // Inst_DS__DS_MSKOR_B32 - - Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32() - { - } // ~Inst_DS__DS_MSKOR_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - void - Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B32 - - Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32() - { - } // ~Inst_DS__DS_WRITE_B32 - - // MEM[ADDR] = DATA. - // Write dword. - void - Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2_B32 - - Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32() - { - } // ~Inst_DS__DS_WRITE2_B32 - - // MEM[ADDR_BASE + OFFSET0 * 4] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2. - // Write 2 dwords. - void - Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4; - Addr offset1 = instData.OFFSET1 * 4; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2st64_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2ST64_B32 - - Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32() - { - } // ~Inst_DS__DS_WRITE2ST64_B32 - - // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2; - // Write 2 dwords. - void - Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4 * 64; - Addr offset1 = instData.OFFSET1 * 4 * 64; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_CMPST_B32 class methods --- - - Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_b32") - { - } // Inst_DS__DS_CMPST_B32 - - Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32() - { - } // ~Inst_DS__DS_CMPST_B32 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - void - Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_f32") - { - setFlag(F32); - } // Inst_DS__DS_CMPST_F32 - - Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32() - { - } // ~Inst_DS__DS_CMPST_F32 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_F32 - - Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32() - { - } // ~Inst_DS__DS_MIN_F32 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - void - Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_F32 - - Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32() - { - } // ~Inst_DS__DS_MAX_F32 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - void - Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_nop") - { - setFlag(Nop); - } // Inst_DS__DS_NOP - - Inst_DS__DS_NOP::~Inst_DS__DS_NOP() - { - } // ~Inst_DS__DS_NOP - - // Do nothing. - void - Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } - - Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_F32 - - Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() - { - } // ~Inst_DS__DS_ADD_F32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b8") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B8 - - Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8() - { - } // ~Inst_DS__DS_WRITE_B8 - - // MEM[ADDR] = DATA[7:0]. - void - Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU8 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B16 class methods --- - - Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b16") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B16 - - Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16() - { - } // ~Inst_DS__DS_WRITE_B16 - - // MEM[ADDR] = DATA[15:0] - void - Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU16 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_ADD_RTN_U32 class methods --- - - Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_u32") - { - } // Inst_DS__DS_ADD_RTN_U32 - - Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32() - { - } // ~Inst_DS__DS_ADD_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_rtn_u32") - { - } // Inst_DS__DS_SUB_RTN_U32 - - Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32() - { - } // ~Inst_DS__DS_SUB_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_rtn_u32") - { - } // Inst_DS__DS_RSUB_RTN_U32 - - Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32() - { - } // ~Inst_DS__DS_RSUB_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - void - Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_rtn_u32") - { - } // Inst_DS__DS_INC_RTN_U32 - - Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32() - { - } // ~Inst_DS__DS_INC_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_rtn_u32") - { - } // Inst_DS__DS_DEC_RTN_U32 - - Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32() - { - } // ~Inst_DS__DS_DEC_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_i32") - { - } // Inst_DS__DS_MIN_RTN_I32 - - Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32() - { - } // ~Inst_DS__DS_MIN_RTN_I32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_i32") - { - } // Inst_DS__DS_MAX_RTN_I32 - - Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32() - { - } // ~Inst_DS__DS_MAX_RTN_I32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_u32") - { - } // Inst_DS__DS_MIN_RTN_U32 - - Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32() - { - } // ~Inst_DS__DS_MIN_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_u32") - { - } // Inst_DS__DS_MAX_RTN_U32 - - Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32() - { - } // ~Inst_DS__DS_MAX_RTN_U32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_rtn_b32") - { - } // Inst_DS__DS_AND_RTN_B32 - - Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32() - { - } // ~Inst_DS__DS_AND_RTN_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_rtn_b32") - { - } // Inst_DS__DS_OR_RTN_B32 - - Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32() - { - } // ~Inst_DS__DS_OR_RTN_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_rtn_b32") - { - } // Inst_DS__DS_XOR_RTN_B32 - - Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32() - { - } // ~Inst_DS__DS_XOR_RTN_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_rtn_b32") - { - } // Inst_DS__DS_MSKOR_RTN_B32 - - Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32() - { - } // ~Inst_DS__DS_MSKOR_RTN_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - void - Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg_rtn_b32") - { - } // Inst_DS__DS_WRXCHG_RTN_B32 - - Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG_RTN_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - // Write-exchange operation. - void - Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32") - { - } // Inst_DS__DS_WRXCHG2_RTN_B32 - - Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG2_RTN_B32 - - // Write-exchange 2 separate dwords. - void - Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32") - { - } // Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - // Write-exchange 2 separate dwords with a stride of 64 dwords. - void - Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_b32") - { - } // Inst_DS__DS_CMPST_RTN_B32 - - Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32() - { - } // ~Inst_DS__DS_CMPST_RTN_B32 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - void - Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_CMPST_RTN_F32 - - Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32() - { - } // ~Inst_DS__DS_CMPST_RTN_F32 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_RTN_F32 - - Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32() - { - } // ~Inst_DS__DS_MIN_RTN_F32 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - void - Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_RTN_F32 - - Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32() - { - } // ~Inst_DS__DS_MAX_RTN_F32 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - void - Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrap_rtn_b32") - { - } // Inst_DS__DS_WRAP_RTN_B32 - - Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32() - { - } // ~Inst_DS__DS_WRAP_RTN_B32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2; - // RETURN_DATA = tmp. - void - Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_RTN_F32 - - Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32() - { - } // ~Inst_DS__DS_ADD_RTN_F32 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - } - - Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B32 - - Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32() - { - } // ~Inst_DS__DS_READ_B32 - - // RETURN_DATA = MEM[ADDR]. - // Dword read. - void - Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - - Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2_B32 - - Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32() - { - } // ~Inst_DS__DS_READ2_B32 - - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4]. - // Read 2 dwords. - void - Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4; - Addr offset1 = instData.OFFSET1 * 4; - - initDualMemRead(gpuDynInst, offset0, offset1); - } // initiateAcc - - void - Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - - Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2st64_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2ST64_B32 - - Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32() - { - } // ~Inst_DS__DS_READ2ST64_B32 - - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64]. - // Read 2 dwords. - void - Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = (instData.OFFSET0 * 4 * 64); - Addr offset1 = (instData.OFFSET1 * 4 * 64); - - initDualMemRead(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } - // --- Inst_DS__DS_READ_I8 class methods --- - - Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_i8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_I8 - - Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8() - { - } // ~Inst_DS__DS_READ_I8 - - // RETURN_DATA = signext(MEM[ADDR][7:0]). - // Signed byte read. - void - Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_u8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_U8 - - Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8() - { - } // ~Inst_DS__DS_READ_U8 - - // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}. - // Unsigned byte read. - void - Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)(reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ_I16 class methods --- - - Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_i16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_I16 - - Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16() - { - } // ~Inst_DS__DS_READ_I16 - - // RETURN_DATA = signext(MEM[ADDR][15:0]). - // Signed short read. - void - Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_u16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_U16 - - Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16() - { - } // ~Inst_DS__DS_READ_U16 - - // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}. - // Unsigned short read. - void - Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - void - Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)(reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_SWIZZLE_B32 class methods --- - - Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_swizzle_b32") - { - setFlag(Load); - } // Inst_DS__DS_SWIZZLE_B32 - - Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32() - { - } // ~Inst_DS__DS_SWIZZLE_B32 - - // RETURN_DATA = swizzle(vgpr_data, offset1:offset0). - // Dword swizzle, no data is written to LDS memory; - void - Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->rdLmReqsInPipe--; - wf->validateRequestCounters(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - /** - * The "DS pattern" is comprised of both offset fields. That is, the - * swizzle pattern between lanes. Bit 15 of the DS pattern dictates - * which swizzle mode to use. There are two different swizzle - * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use - * QDMode else use Bit-masks mode. The remaining bits dictate how to - * swizzle the lanes. - * - * QDMode: Chunks the lanes into 4s and swizzles among them. - * Bits 7:6 dictate where lane 3 (of the current chunk) - * gets its date, 5:4 lane 2, etc. - * - * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. - * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 - * is the and_mask. Each lane is swizzled by performing - * the appropriate operation using these masks. - */ - VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0); - - data.read(); - - if (bits(ds_pattern, 15)) { - // QDMode - for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) { - /** - * This operation allows data sharing between groups - * of four consecutive threads. Note the increment by - * 4 in the for loop. - */ - if (gpuDynInst->exec_mask[lane]) { - int index0 = lane + bits(ds_pattern, 1, 0); - panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index0); - vdst[lane] - = gpuDynInst->exec_mask[index0] ? data[index0]: 0; - } - if (gpuDynInst->exec_mask[lane + 1]) { - int index1 = lane + bits(ds_pattern, 3, 2); - panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index1); - vdst[lane + 1] - = gpuDynInst->exec_mask[index1] ? data[index1]: 0; - } - if (gpuDynInst->exec_mask[lane + 2]) { - int index2 = lane + bits(ds_pattern, 5, 4); - panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index2); - vdst[lane + 2] - = gpuDynInst->exec_mask[index2] ? data[index2]: 0; - } - if (gpuDynInst->exec_mask[lane + 3]) { - int index3 = lane + bits(ds_pattern, 7, 6); - panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index3); - vdst[lane + 3] - = gpuDynInst->exec_mask[index3] ? data[index3]: 0; - } - } - } else { - // Bit Mode - int and_mask = bits(ds_pattern, 4, 0); - int or_mask = bits(ds_pattern, 9, 5); - int xor_mask = bits(ds_pattern, 14, 10); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - int index = (((lane & and_mask) | or_mask) ^ xor_mask); - // Adjust for the next 32 lanes. - if (lane > 31) { - index += 32; - } - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is " - "out of bounds.\n", gpuDynInst->disassemble(), - index); - vdst[lane] - = gpuDynInst->exec_mask[index] ? data[index] : 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - } // execute - // --- Inst_DS__DS_PERMUTE_B32 class methods --- - - Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_permute_b32") - { - setFlag(MemoryRef); - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_PERMUTE_B32 - - Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32() - { - } // ~Inst_DS__DS_PERMUTE_B32 - - // Forward permute. - void - Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - addr.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - /** - * One of the offset fields can be used for the index. - * It is assumed OFFSET0 would be used, as OFFSET1 is - * typically only used for DS ops that operate on two - * disparate pieces of data. - */ - assert(!instData.OFFSET1); - /** - * The address provided is a byte address, but VGPRs are - * 4 bytes, so we must divide by 4 to get the actual VGPR - * index. Additionally, the index is calculated modulo the - * WF size, 64 in this case, so we simply extract bits 7-2. - */ - int index = bits(addr[lane] + instData.OFFSET0, 7, 2); - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " - "of bounds.\n", gpuDynInst->disassemble(), index); - /** - * If the shuffled index corresponds to a lane that is - * inactive then this instruction writes a 0 to the active - * lane in VDST. - */ - if (wf->execMask(index)) { - vdst[index] = data[lane]; - } else { - vdst[index] = 0; - } - } - } - - vdst.write(); - - wf->decLGKMInstsIssued(); - wf->rdLmReqsInPipe--; - wf->validateRequestCounters(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - } // execute - // --- Inst_DS__DS_BPERMUTE_B32 class methods --- - - Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_bpermute_b32") - { - setFlag(MemoryRef); - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_BPERMUTE_B32 - - Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32() - { - } // ~Inst_DS__DS_BPERMUTE_B32 - - // Backward permute. - void - Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - addr.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - /** - * One of the offset fields can be used for the index. - * It is assumed OFFSET0 would be used, as OFFSET1 is - * typically only used for DS ops that operate on two - * disparate pieces of data. - */ - assert(!instData.OFFSET1); - /** - * The address provided is a byte address, but VGPRs are - * 4 bytes, so we must divide by 4 to get the actual VGPR - * index. Additionally, the index is calculated modulo the - * WF size, 64 in this case, so we simply extract bits 7-2. - */ - int index = bits(addr[lane] + instData.OFFSET0, 7, 2); - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " - "of bounds.\n", gpuDynInst->disassemble(), index); - /** - * If the shuffled index corresponds to a lane that is - * inactive then this instruction writes a 0 to the active - * lane in VDST. - */ - if (wf->execMask(index)) { - vdst[lane] = data[index]; - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - - wf->decLGKMInstsIssued(); - wf->rdLmReqsInPipe--; - wf->validateRequestCounters(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - } // execute - - // --- Inst_DS__DS_ADD_U64 class methods --- - - Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_u64") - { - } // Inst_DS__DS_ADD_U64 - - Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() - { - } // ~Inst_DS__DS_ADD_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_u64") - { - } // Inst_DS__DS_SUB_U64 - - Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64() - { - } // ~Inst_DS__DS_SUB_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_u64") - { - } // Inst_DS__DS_RSUB_U64 - - Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64() - { - } // ~Inst_DS__DS_RSUB_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_u64") - { - } // Inst_DS__DS_INC_U64 - - Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64() - { - } // ~Inst_DS__DS_INC_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_u64") - { - } // Inst_DS__DS_DEC_U64 - - Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64() - { - } // ~Inst_DS__DS_DEC_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_i64") - { - } // Inst_DS__DS_MIN_I64 - - Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64() - { - } // ~Inst_DS__DS_MIN_I64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_i64") - { - } // Inst_DS__DS_MAX_I64 - - Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64() - { - } // ~Inst_DS__DS_MAX_I64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_u64") - { - } // Inst_DS__DS_MIN_U64 - - Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64() - { - } // ~Inst_DS__DS_MIN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_u64") - { - } // Inst_DS__DS_MAX_U64 - - Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64() - { - } // ~Inst_DS__DS_MAX_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_b64") - { - } // Inst_DS__DS_AND_B64 - - Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64() - { - } // ~Inst_DS__DS_AND_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_b64") - { - } // Inst_DS__DS_OR_B64 - - Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64() - { - } // ~Inst_DS__DS_OR_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_b64") - { - } // Inst_DS__DS_XOR_B64 - - Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64() - { - } // ~Inst_DS__DS_XOR_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_b64") - { - } // Inst_DS__DS_MSKOR_B64 - - Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64() - { - } // ~Inst_DS__DS_MSKOR_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - void - Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B64 - - Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64() - { - } // ~Inst_DS__DS_WRITE_B64 - - // MEM[ADDR] = DATA. - // Write qword. - void - Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2_B64 - - Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64() - { - } // ~Inst_DS__DS_WRITE2_B64 - - // MEM[ADDR_BASE + OFFSET0 * 8] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2. - // Write 2 qwords. - void - Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8; - Addr offset1 = instData.OFFSET1 * 8; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2st64_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2ST64_B64 - - Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64() - { - } // ~Inst_DS__DS_WRITE2ST64_B64 - - // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2; - // Write 2 qwords. - void - Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_b64") - { - } // Inst_DS__DS_CMPST_B64 - - Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64() - { - } // ~Inst_DS__DS_CMPST_B64 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - void - Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_f64") - { - setFlag(F64); - } // Inst_DS__DS_CMPST_F64 - - Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64() - { - } // ~Inst_DS__DS_CMPST_F64 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_F64 - - Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64() - { - } // ~Inst_DS__DS_MIN_F64 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - void - Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_F64 - - Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64() - { - } // ~Inst_DS__DS_MAX_F64 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - void - Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_u64") - { - } // Inst_DS__DS_ADD_RTN_U64 - - Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64() - { - } // ~Inst_DS__DS_ADD_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_rtn_u64") - { - } // Inst_DS__DS_SUB_RTN_U64 - - Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64() - { - } // ~Inst_DS__DS_SUB_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_rtn_u64") - { - } // Inst_DS__DS_RSUB_RTN_U64 - - Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64() - { - } // ~Inst_DS__DS_RSUB_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_rtn_u64") - { - } // Inst_DS__DS_INC_RTN_U64 - - Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64() - { - } // ~Inst_DS__DS_INC_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_rtn_u64") - { - } // Inst_DS__DS_DEC_RTN_U64 - - Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64() - { - } // ~Inst_DS__DS_DEC_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_i64") - { - } // Inst_DS__DS_MIN_RTN_I64 - - Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64() - { - } // ~Inst_DS__DS_MIN_RTN_I64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_i64") - { - } // Inst_DS__DS_MAX_RTN_I64 - - Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64() - { - } // ~Inst_DS__DS_MAX_RTN_I64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_u64") - { - } // Inst_DS__DS_MIN_RTN_U64 - - Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64() - { - } // ~Inst_DS__DS_MIN_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_u64") - { - } // Inst_DS__DS_MAX_RTN_U64 - - Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64() - { - } // ~Inst_DS__DS_MAX_RTN_U64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_rtn_b64") - { - } // Inst_DS__DS_AND_RTN_B64 - - Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64() - { - } // ~Inst_DS__DS_AND_RTN_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_rtn_b64") - { - } // Inst_DS__DS_OR_RTN_B64 - - Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64() - { - } // ~Inst_DS__DS_OR_RTN_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_rtn_b64") - { - } // Inst_DS__DS_XOR_RTN_B64 - - Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64() - { - } // ~Inst_DS__DS_XOR_RTN_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_rtn_b64") - { - } // Inst_DS__DS_MSKOR_RTN_B64 - - Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64() - { - } // ~Inst_DS__DS_MSKOR_RTN_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg_rtn_b64") - { - } // Inst_DS__DS_WRXCHG_RTN_B64 - - Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG_RTN_B64 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - // Write-exchange operation. - void - Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64") - { - } // Inst_DS__DS_WRXCHG2_RTN_B64 - - Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG2_RTN_B64 - - // Write-exchange 2 separate qwords. - void - Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64") - { - } // Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - // Write-exchange 2 qwords with a stride of 64 qwords. - void - Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_b64") - { - } // Inst_DS__DS_CMPST_RTN_B64 - - Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64() - { - } // ~Inst_DS__DS_CMPST_RTN_B64 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - void - Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_CMPST_RTN_F64 - - Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64() - { - } // ~Inst_DS__DS_CMPST_RTN_F64 - - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_RTN_F64 - - Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64() - { - } // ~Inst_DS__DS_MIN_RTN_F64 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - void - Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_RTN_F64 - - Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64() - { - } // ~Inst_DS__DS_MAX_RTN_F64 - - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - void - Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B64 - - Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64() - { - } // ~Inst_DS__DS_READ_B64 - - // RETURN_DATA = MEM[ADDR]. - // Read 1 qword. - void - Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - - Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2_B64 - - Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64() - { - } // ~Inst_DS__DS_READ2_B64 - - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8]. - // Read 2 qwords. - void - Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8; - Addr offset1 = instData.OFFSET1 * 8; - - initDualMemRead(gpuDynInst, offset0, offset1); - } // initiateAcc - - void - Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst0(gpuDynInst, extData.VDST); - VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - - Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2st64_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2ST64_B64 - - Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64() - { - } // ~Inst_DS__DS_READ2ST64_B64 - - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64]. - // Read 2 qwords. - void - Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = (instData.OFFSET0 * 8 * 64); - Addr offset1 = (instData.OFFSET1 * 8 * 64); - - initDualMemRead(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst0(gpuDynInst, extData.VDST); - VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } - - Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_condxchg32_rtn_b64") - { - } // Inst_DS__DS_CONDXCHG32_RTN_B64 - - Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64() - { - } // ~Inst_DS__DS_CONDXCHG32_RTN_B64 - - // Conditional write exchange. - void - Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_u32") - { - } // Inst_DS__DS_ADD_SRC2_U32 - - Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32() - { - } // ~Inst_DS__DS_ADD_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] + MEM[B]. - void - Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_src2_u32") - { - } // Inst_DS__DS_SUB_SRC2_U32 - - Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32() - { - } // ~Inst_DS__DS_SUB_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] - MEM[B]. - void - Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_src2_u32") - { - } // Inst_DS__DS_RSUB_SRC2_U32 - - Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32() - { - } // ~Inst_DS__DS_RSUB_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] - MEM[A]. - void - Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_src2_u32") - { - } // Inst_DS__DS_INC_SRC2_U32 - - Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32() - { - } // ~Inst_DS__DS_INC_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). - void - Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_src2_u32") - { - } // Inst_DS__DS_DEC_SRC2_U32 - - Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32() - { - } // ~Inst_DS__DS_DEC_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). - // Uint decrement. - void - Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_i32") - { - } // Inst_DS__DS_MIN_SRC2_I32 - - Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32() - { - } // ~Inst_DS__DS_MIN_SRC2_I32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_i32") - { - } // Inst_DS__DS_MAX_SRC2_I32 - - Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32() - { - } // ~Inst_DS__DS_MAX_SRC2_I32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_u32") - { - } // Inst_DS__DS_MIN_SRC2_U32 - - Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32() - { - } // ~Inst_DS__DS_MIN_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_u32") - { - } // Inst_DS__DS_MAX_SRC2_U32 - - Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32() - { - } // ~Inst_DS__DS_MAX_SRC2_U32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_src2_b32") - { - } // Inst_DS__DS_AND_SRC2_B32 - - Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32() - { - } // ~Inst_DS__DS_AND_SRC2_B32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] & MEM[B]. - void - Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_src2_b32") - { - } // Inst_DS__DS_OR_SRC2_B32 - - Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32() - { - } // ~Inst_DS__DS_OR_SRC2_B32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] | MEM[B]. - void - Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_src2_b32") - { - } // Inst_DS__DS_XOR_SRC2_B32 - - Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32() - { - } // ~Inst_DS__DS_XOR_SRC2_B32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] ^ MEM[B]. - void - Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_src2_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_SRC2_B32 - - Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32() - { - } // ~Inst_DS__DS_WRITE_SRC2_B32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B]. - // Write dword. - void - Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_SRC2_F32 - - Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32() - { - } // ~Inst_DS__DS_MIN_SRC2_F32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. - void - Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_SRC2_F32 - - Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32() - { - } // ~Inst_DS__DS_MAX_SRC2_F32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. - void - Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_SRC2_F32 - - Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32() - { - } // ~Inst_DS__DS_ADD_SRC2_F32 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] + MEM[A]. - void - Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_release_all") - { - } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL() - { - } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - void - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_init") - { - } // Inst_DS__DS_GWS_INIT - - Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT() - { - } // ~Inst_DS__DS_GWS_INIT - - void - Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_v") - { - } // Inst_DS__DS_GWS_SEMA_V - - Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V() - { - } // ~Inst_DS__DS_GWS_SEMA_V - - void - Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_br") - { - } // Inst_DS__DS_GWS_SEMA_BR - - Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR() - { - } // ~Inst_DS__DS_GWS_SEMA_BR - - void - Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_p") - { - } // Inst_DS__DS_GWS_SEMA_P - - Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P() - { - } // ~Inst_DS__DS_GWS_SEMA_P - - void - Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_barrier") - { - } // Inst_DS__DS_GWS_BARRIER - - Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER() - { - } // ~Inst_DS__DS_GWS_BARRIER - - void - Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_consume") - { - } // Inst_DS__DS_CONSUME - - Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME() - { - } // ~Inst_DS__DS_CONSUME - - void - Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_append") - { - } // Inst_DS__DS_APPEND - - Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND() - { - } // ~Inst_DS__DS_APPEND - - void - Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_ordered_count") - { - } // Inst_DS__DS_ORDERED_COUNT - - Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT() - { - } // ~Inst_DS__DS_ORDERED_COUNT - - void - Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_u64") - { - } // Inst_DS__DS_ADD_SRC2_U64 - - Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64() - { - } // ~Inst_DS__DS_ADD_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] + MEM[B]. - void - Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_src2_u64") - { - } // Inst_DS__DS_SUB_SRC2_U64 - - Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64() - { - } // ~Inst_DS__DS_SUB_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] - MEM[B]. - void - Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_src2_u64") - { - } // Inst_DS__DS_RSUB_SRC2_U64 - - Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64() - { - } // ~Inst_DS__DS_RSUB_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] - MEM[A]. - void - Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_src2_u64") - { - } // Inst_DS__DS_INC_SRC2_U64 - - Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64() - { - } // ~Inst_DS__DS_INC_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). - void - Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_src2_u64") - { - } // Inst_DS__DS_DEC_SRC2_U64 - - Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64() - { - } // ~Inst_DS__DS_DEC_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). - // Uint decrement. - void - Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_i64") - { - } // Inst_DS__DS_MIN_SRC2_I64 - - Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64() - { - } // ~Inst_DS__DS_MIN_SRC2_I64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_i64") - { - } // Inst_DS__DS_MAX_SRC2_I64 - - Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64() - { - } // ~Inst_DS__DS_MAX_SRC2_I64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_u64") - { - } // Inst_DS__DS_MIN_SRC2_U64 - - Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64() - { - } // ~Inst_DS__DS_MIN_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_u64") - { - } // Inst_DS__DS_MAX_SRC2_U64 - - Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64() - { - } // ~Inst_DS__DS_MAX_SRC2_U64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_src2_b64") - { - } // Inst_DS__DS_AND_SRC2_B64 - - Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64() - { - } // ~Inst_DS__DS_AND_SRC2_B64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] & MEM[B]. - void - Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_src2_b64") - { - } // Inst_DS__DS_OR_SRC2_B64 - - Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64() - { - } // ~Inst_DS__DS_OR_SRC2_B64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] | MEM[B]. - void - Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_src2_b64") - { - } // Inst_DS__DS_XOR_SRC2_B64 - - Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64() - { - } // ~Inst_DS__DS_XOR_SRC2_B64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] ^ MEM[B]. - void - Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_src2_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_SRC2_B64 - - Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64() - { - } // ~Inst_DS__DS_WRITE_SRC2_B64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B]. - // Write qword. - void - Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_SRC2_F64 - - Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64() - { - } // ~Inst_DS__DS_MIN_SRC2_F64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. - void - Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_SRC2_F64 - - Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64() - { - } // ~Inst_DS__DS_MAX_SRC2_F64 - - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. - void - Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b96") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B96 - - Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96() - { - } // ~Inst_DS__DS_WRITE_B96 - - // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0]. - // Tri-dword write. - void - Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite<3>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b128") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B128 - - Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128() - { - } // ~Inst_DS__DS_WRITE_B128 - - // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0]. - // Qword write. - void - Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite<4>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B128::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b96") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B96 - - Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96() - { - } // ~Inst_DS__DS_READ_B96 - - // Tri-dword read. - void - Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ_B96::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead<3>(gpuDynInst, offset); - } - - void - Inst_DS__DS_READ_B96::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } - - Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b128") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B128 - - Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128() - { - } // ~Inst_DS__DS_READ_B128 - - // Qword read. - void - Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } - - void - Inst_DS__DS_READ_B128::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead<4>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B128::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - - Inst_MUBUF__BUFFER_LOAD_FORMAT_X - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - // Untyped buffer load 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - // Untyped buffer load 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - // Untyped buffer load 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - // Untyped buffer load 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_X - ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_X - - Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X - - // Untyped buffer store 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_XY - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - // Untyped buffer store 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - // Untyped buffer store 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - // Untyped buffer store 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - // Untyped buffer load 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - // Untyped buffer load 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - // Untyped buffer load 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - // Untyped buffer load 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - // Untyped buffer store 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - // Untyped buffer store 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - // Untyped buffer store 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - // Untyped buffer store 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_UBYTE - ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_ubyte") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_UBYTE - - Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE() - { - } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE - - // Untyped buffer load unsigned byte (zero extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe. - issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } - } - - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } - - - Inst_MUBUF__BUFFER_LOAD_SBYTE - ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_sbyte") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_SBYTE - - Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE() - { - } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE - - // Untyped buffer load signed byte (sign extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_USHORT - ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_ushort") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_USHORT - - Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT() - { - } // ~Inst_MUBUF__BUFFER_LOAD_USHORT - - // Untyped buffer load unsigned short (zero extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } - - void - Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } - - - Inst_MUBUF__BUFFER_LOAD_SSHORT - ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_sshort") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_SSHORT - - Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT() - { - } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT - - // Untyped buffer load signed short (sign extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_LOAD_DWORD - ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORD - - Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORD - - // Untyped buffer load dword. - void - Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } - - void - Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // completeAcc - - Inst_MUBUF__BUFFER_LOAD_DWORDX2 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - // Untyped buffer load 2 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - - Inst_MUBUF__BUFFER_LOAD_DWORDX3 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx3") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - // Untyped buffer load 3 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<3>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - vdst2[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } // completeAcc - - Inst_MUBUF__BUFFER_LOAD_DWORDX4 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - // Untyped buffer load 4 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - vdst2[lane] = 0; - vdst3[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - - Inst_MUBUF__BUFFER_STORE_BYTE - ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_byte") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_BYTE - - Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE() - { - } // ~Inst_MUBUF__BUFFER_STORE_BYTE - - // Untyped buffer store byte. - void - Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandI8 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } - - void - Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_SHORT - ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_short") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_SHORT - - Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT() - { - } // ~Inst_MUBUF__BUFFER_STORE_SHORT - - // Untyped buffer store short. - void - Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandI16 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } - - void - Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MUBUF__BUFFER_STORE_DWORD:: - Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORD - - Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORD - - // Untyped buffer store dword. - void - Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - } - - void - Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_MUBUF__BUFFER_STORE_DWORDX2 - ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX2 - - Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2 - - // Untyped buffer store 2 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<2>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_MUBUF__BUFFER_STORE_DWORDX3 - ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx3") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX3 - - Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3 - - // Untyped buffer store 3 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - data2.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] - = data2[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<3>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_MUBUF__BUFFER_STORE_DWORDX4 - ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX4 - - Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4 - - // Untyped buffer store 4 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - if (isLocalMem()) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] - = data2[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 3] - = data3[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_MUBUF__BUFFER_STORE_LDS_DWORD - ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_lds_dword") - { - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD() - { - } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - // Store one DWORD from LDS memory to system memory without utilizing - // VGPRs. - void - Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_wbinvl1") - { - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemSync); - setFlag(GlobalSegment); - setFlag(MemSync); - } // Inst_MUBUF__BUFFER_WBINVL1 - - Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1() - { - } // ~Inst_MUBUF__BUFFER_WBINVL1 - - // Write back and invalidate the shader L1. - // Always returns ACK to shader. - void - Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst) - { - injectGlobalMemFence(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_MUBUF__BUFFER_WBINVL1_VOL - ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt) - : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") { - /** - * This instruction is same as buffer_wbinvl1 instruction except this - * instruction only invalidate L1 shader line with MTYPE for system - * or group coherence. Since L1 do not differentiate between its cache - * lines, this instruction currently behaves (and implemented ) - * exactly like buffer_wbinvl1 instruction. - */ - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemSync); - setFlag(GlobalSegment); - setFlag(MemSync); - } // Inst_MUBUF__BUFFER_WBINVL1_VOL - - Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL() - { - } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL - - // Write back and invalidate the shader L1 only for lines that are marked - // volatile. Always returns ACK to shader. - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) - { - injectGlobalMemFence(gpuDynInst); - } // initiateAcc - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_MUBUF__BUFFER_ATOMIC_SWAP - ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SWAP - - Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_ADD - ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_ADD - - Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SUB - ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SUB - - Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SMIN - ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMIN - - Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_UMIN - ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMIN - - Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SMAX - ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMAX - - Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_UMAX - ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMAX - - Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_AND - ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_AND - - Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_AND - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_OR - ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_OR - - Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_OR - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_XOR - ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_XOR - - Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_INC - ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_INC - - Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_INC - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_DEC - ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_DEC - - Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - // tmp = MEM[ADDR]; - // src = DATA[0:1]; - // cmp = DATA[2:3]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_add_x2") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_and_x2") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_or_x2") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - // Typed buffer load 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - // Typed buffer load 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - // Typed buffer load 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - // Typed buffer load 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_X - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - // Typed buffer store 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - // Typed buffer store 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - // Typed buffer store 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - // Typed buffer store 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X:: - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - // Typed buffer load 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - // Typed buffer load 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ( - InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - // Typed buffer load 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW( - InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - // Typed buffer load 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - // Typed buffer store 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - // Typed buffer store 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - // Typed buffer store 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - // Typed buffer store 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute( - GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD - - Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD() - { - } // ~Inst_MIMG__IMAGE_LOAD - - // Image memory load with format conversion specified - void - Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP - - Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP - - void - Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_pck") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_PCK - - Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK() - { - } // ~Inst_MIMG__IMAGE_LOAD_PCK - - void - Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_pck_sgn") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_PCK_SGN - - Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN() - { - } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN - - // Image memory load with with no format conversion and sign extension - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip_pck") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP_PCK - - Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK - - // Image memory load with user-supplied mip level, no format conversion - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip_pck_sgn") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - // Image memory load with user-supplied mip level, no format conversion. - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE - - Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE() - { - } // ~Inst_MIMG__IMAGE_STORE - - // Image memory store with format conversion specified - void - Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_mip") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_MIP - - Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP() - { - } // ~Inst_MIMG__IMAGE_STORE_MIP - - void - Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_pck") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_PCK - - Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK() - { - } // ~Inst_MIMG__IMAGE_STORE_PCK - - // Image memory store of packed data without format conversion. - void - Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_mip_pck") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_MIP_PCK - - Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK() - { - } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK - - // Image memory store of packed data without format conversion - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_get_resinfo") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GET_RESINFO - - Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO() - { - } // ~Inst_MIMG__IMAGE_GET_RESINFO - - void - Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SWAP - - Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_ADD - - Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_ADD - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SUB - - Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SUB - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SMIN - - Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_UMIN - - Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SMAX - - Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_UMAX - - Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_AND - - Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_AND - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_OR - - Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_OR - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_XOR - - Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_XOR - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_INC - - Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_INC - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_DEC - - Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_DEC - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE - - Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE() - { - } // ~Inst_MIMG__IMAGE_SAMPLE - - void - Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CL - - Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CL - - void - Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D - - Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D - - void - Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_CL - - Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL - - void - Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_L - - Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_L - - void - Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B - - Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B - - void - Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_CL - - Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL - - void - Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_LZ - - Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_LZ - - void - Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C - - Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C - - void - Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CL - - Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL - - void - Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D - - Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D - - void - Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - void - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_L - - Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_L - - void - Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B - - Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B - - void - Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - void - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_LZ - - Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ - - void - Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_O - - Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_O - - void - Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CL_O - - Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_O - - Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_O - - void - Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_L_O - - Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_L_O - - void - Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_O - - Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_O - - void - Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_LZ_O - - Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O - - void - Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_O - - Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_O - - Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_L_O - - Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_O - - Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4 - - Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4() - { - } // ~Inst_MIMG__IMAGE_GATHER4 - - void - Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_CL - - Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_CL - - void - Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_L - - Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L() - { - } // ~Inst_MIMG__IMAGE_GATHER4_L - - void - Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B - - Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B - - void - Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_CL - - Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_CL - - void - Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_LZ - - Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ() - { - } // ~Inst_MIMG__IMAGE_GATHER4_LZ - - void - Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C - - Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C - - void - Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_CL - - Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_CL - - void - Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_L - - Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_L - - void - Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B - - Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B - - void - Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_CL - - Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL - - void - Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_LZ - - Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ - - void - Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_O - - Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_O - - void - Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_CL_O - - Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_CL_O - - void - Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_L_O - - Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_L_O - - void - Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_O - - Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_O - - void - Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_CL_O - - Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O - - void - Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_LZ_O - - Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O - - void - Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_O - - Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_O - - void - Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_CL_O - - Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O - - void - Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_L_O - - Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O - - void - Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_O - - Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O - - void - Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - void - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - void - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_get_lod") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GET_LOD - - Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD() - { - } // ~Inst_MIMG__IMAGE_GET_LOD - - void - Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD - - Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD - - void - Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_CL - - Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL - - void - Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD - - Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD - - void - Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_O - - Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O - - void - Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt) - : Inst_EXP(iFmt, "exp") - { - } // Inst_EXP__EXP - - Inst_EXP__EXP::~Inst_EXP__EXP() - { - } // ~Inst_EXP__EXP - - void - Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_ubyte") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_UBYTE - - Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE() - { - } // ~Inst_FLAT__FLAT_LOAD_UBYTE - - // Untyped buffer load unsigned byte (zero extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } // execute - - void - Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } // execute - // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods --- - - Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_sbyte") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_SBYTE - - Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE() - { - } // ~Inst_FLAT__FLAT_LOAD_SBYTE - - // Untyped buffer load signed byte (sign extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandI32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemI32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } - - Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_ushort") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_USHORT - - Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT() - { - } // ~Inst_FLAT__FLAT_LOAD_USHORT - - // Untyped buffer load unsigned short (zero extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } - - - Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_sshort") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_SSHORT - - Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT() - { - } // ~Inst_FLAT__FLAT_LOAD_SSHORT - - // Untyped buffer load signed short (sign extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - void - Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORD - - Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORD - - // Untyped buffer load dword. - void - Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - vdst.write(); - } // completeAcc - - Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX2 - - Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX2 - - // Untyped buffer load 2 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - vdst.write(); - } // completeAcc - - Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx3") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX3 - - Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX3 - - // Untyped buffer load 3 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<3>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } // completeAcc - - Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX4 - - Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX4 - - // Untyped buffer load 4 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->rdGmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - - Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_byte") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_BYTE - - Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE() - { - } // ~Inst_FLAT__FLAT_STORE_BYTE - - // Untyped buffer store byte. - void - Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->wrLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU8 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } // execute - - void - Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - - Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_short") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_SHORT - - Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT() - { - } // ~Inst_FLAT__FLAT_STORE_SHORT - - // Untyped buffer store short. - void - Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->wrLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU16 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORD - - Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD() - { - } // ~Inst_FLAT__FLAT_STORE_DWORD - - // Untyped buffer store dword. - void - Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->wrLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX2 - - Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX2 - - // Untyped buffer store 2 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->wrLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx3") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX3 - - Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX3 - - // Untyped buffer store 3 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->wrLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2] = data2[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<3>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX4 - - Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX4 - - // Untyped buffer store 4 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->wrLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe - .issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SWAP - - Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SWAP - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL || - gpuDynInst->executedAs() == enums::SC_PRIVATE) { - // TODO: additional address computation required for scratch - panic_if(gpuDynInst->executedAs() == enums::SC_PRIVATE, - "Flats to private aperture not tested yet\n"); - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP - ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP() - { - } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); - - addr.read(); - data.read(); - cmp.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL || - gpuDynInst->executedAs() == enums::SC_PRIVATE) { - /** - * TODO: If you encounter this panic, just remove this panic - * and restart the simulation. It should just work fine but - * this is to warn user that this path is never tested although - * all the necessary logic is implemented - */ - panic_if(gpuDynInst->executedAs() == enums::SC_PRIVATE, - "Flats to private aperture not tested yet\n"); - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD - - Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } // if - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SUB - - Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SUB - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - void - Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMIN - - Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMIN - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMIN - - Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMIN - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMAX - - Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMAX - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMAX - - Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMAX - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_AND - - Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND() - { - } // ~Inst_FLAT__FLAT_ATOMIC_AND - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_OR - - Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR() - { - } // ~Inst_FLAT__FLAT_ATOMIC_OR - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_XOR - - Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR() - { - } // ~Inst_FLAT__FLAT_ATOMIC_XOR - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_INC - - Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC() - { - } // ~Inst_FLAT__FLAT_ATOMIC_INC - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_DEC - - Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC() - { - } // ~Inst_FLAT__FLAT_ATOMIC_DEC - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_swap_x2") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - // tmp = MEM[ADDR]; - // src = DATA[0:1]; - // cmp = DATA[2:3]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); - - addr.read(); - data.read(); - cmp.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL || - gpuDynInst->executedAs() == enums::SC_PRIVATE) { - /** - * TODO: If you encounter this panic, just remove this panic - * and restart the simulation. It should just work fine but - * this is to warn user that this path is never tested although - * all the necessary logic is implemented - */ - panic_if(gpuDynInst->executedAs() == enums::SC_PRIVATE, - "Flats to private aperture not tested yet\n"); - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add_x2") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_sub_x2") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_and_x2") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_AND_X2 - - Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_or_x2") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_OR_X2 - - Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_xor_x2") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } - - Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_inc_x2") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_INC_X2 - - Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_dec_x2") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_DEC_X2 - - Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2 - - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - wf->wrGmReqsInPipe--; - wf->rdGmReqsInPipe--; - wf->wrLmReqsInPipe--; - wf->rdLmReqsInPipe--; - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - gpuDynInst->computeUnit()->localMemoryPipe - .issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } - - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc -} // namespace Gcn3ISA -} // namespace gem5 diff --git a/src/arch/amdgpu/gcn3/insts/instructions.hh b/src/arch/amdgpu/gcn3/insts/instructions.hh deleted file mode 100644 index d1b7ccb60e..0000000000 --- a/src/arch/amdgpu/gcn3/insts/instructions.hh +++ /dev/null @@ -1,42723 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_INSTS_INSTRUCTIONS_HH__ -#define __ARCH_GCN3_INSTS_INSTRUCTIONS_HH__ - -#include "arch/amdgpu/gcn3/gpu_decoder.hh" -#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" -#include "arch/amdgpu/gcn3/insts/op_encodings.hh" -#include "debug/GCN3.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - class Inst_SOP2__S_ADD_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ADD_U32(InFmt_SOP2*); - ~Inst_SOP2__S_ADD_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ADD_U32 - - class Inst_SOP2__S_SUB_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_SUB_U32(InFmt_SOP2*); - ~Inst_SOP2__S_SUB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_SUB_U32 - - class Inst_SOP2__S_ADD_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ADD_I32(InFmt_SOP2*); - ~Inst_SOP2__S_ADD_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ADD_I32 - - class Inst_SOP2__S_SUB_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_SUB_I32(InFmt_SOP2*); - ~Inst_SOP2__S_SUB_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_SUB_I32 - - class Inst_SOP2__S_ADDC_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ADDC_U32(InFmt_SOP2*); - ~Inst_SOP2__S_ADDC_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ADDC_U32 - - class Inst_SOP2__S_SUBB_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_SUBB_U32(InFmt_SOP2*); - ~Inst_SOP2__S_SUBB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_SUBB_U32 - - class Inst_SOP2__S_MIN_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_MIN_I32(InFmt_SOP2*); - ~Inst_SOP2__S_MIN_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_MIN_I32 - - class Inst_SOP2__S_MIN_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_MIN_U32(InFmt_SOP2*); - ~Inst_SOP2__S_MIN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_MIN_U32 - - class Inst_SOP2__S_MAX_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_MAX_I32(InFmt_SOP2*); - ~Inst_SOP2__S_MAX_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_MAX_I32 - - class Inst_SOP2__S_MAX_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_MAX_U32(InFmt_SOP2*); - ~Inst_SOP2__S_MAX_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_MAX_U32 - - class Inst_SOP2__S_CSELECT_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_CSELECT_B32(InFmt_SOP2*); - ~Inst_SOP2__S_CSELECT_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_CSELECT_B32 - - class Inst_SOP2__S_CSELECT_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_CSELECT_B64(InFmt_SOP2*); - ~Inst_SOP2__S_CSELECT_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_CSELECT_B64 - - class Inst_SOP2__S_AND_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_AND_B32(InFmt_SOP2*); - ~Inst_SOP2__S_AND_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_AND_B32 - - class Inst_SOP2__S_AND_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_AND_B64(InFmt_SOP2*); - ~Inst_SOP2__S_AND_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_AND_B64 - - class Inst_SOP2__S_OR_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_OR_B32(InFmt_SOP2*); - ~Inst_SOP2__S_OR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_OR_B32 - - class Inst_SOP2__S_OR_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_OR_B64(InFmt_SOP2*); - ~Inst_SOP2__S_OR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_OR_B64 - - class Inst_SOP2__S_XOR_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_XOR_B32(InFmt_SOP2*); - ~Inst_SOP2__S_XOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_XOR_B32 - - class Inst_SOP2__S_XOR_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_XOR_B64(InFmt_SOP2*); - ~Inst_SOP2__S_XOR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_XOR_B64 - - class Inst_SOP2__S_ANDN2_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ANDN2_B32(InFmt_SOP2*); - ~Inst_SOP2__S_ANDN2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ANDN2_B32 - - class Inst_SOP2__S_ANDN2_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ANDN2_B64(InFmt_SOP2*); - ~Inst_SOP2__S_ANDN2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ANDN2_B64 - - class Inst_SOP2__S_ORN2_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ORN2_B32(InFmt_SOP2*); - ~Inst_SOP2__S_ORN2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ORN2_B32 - - class Inst_SOP2__S_ORN2_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ORN2_B64(InFmt_SOP2*); - ~Inst_SOP2__S_ORN2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ORN2_B64 - - class Inst_SOP2__S_NAND_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_NAND_B32(InFmt_SOP2*); - ~Inst_SOP2__S_NAND_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_NAND_B32 - - class Inst_SOP2__S_NAND_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_NAND_B64(InFmt_SOP2*); - ~Inst_SOP2__S_NAND_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_NAND_B64 - - class Inst_SOP2__S_NOR_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_NOR_B32(InFmt_SOP2*); - ~Inst_SOP2__S_NOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_NOR_B32 - - class Inst_SOP2__S_NOR_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_NOR_B64(InFmt_SOP2*); - ~Inst_SOP2__S_NOR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_NOR_B64 - - class Inst_SOP2__S_XNOR_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_XNOR_B32(InFmt_SOP2*); - ~Inst_SOP2__S_XNOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_XNOR_B32 - - class Inst_SOP2__S_XNOR_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_XNOR_B64(InFmt_SOP2*); - ~Inst_SOP2__S_XNOR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_XNOR_B64 - - class Inst_SOP2__S_LSHL_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_LSHL_B32(InFmt_SOP2*); - ~Inst_SOP2__S_LSHL_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_LSHL_B32 - - class Inst_SOP2__S_LSHL_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_LSHL_B64(InFmt_SOP2*); - ~Inst_SOP2__S_LSHL_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_LSHL_B64 - - class Inst_SOP2__S_LSHR_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_LSHR_B32(InFmt_SOP2*); - ~Inst_SOP2__S_LSHR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_LSHR_B32 - - class Inst_SOP2__S_LSHR_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_LSHR_B64(InFmt_SOP2*); - ~Inst_SOP2__S_LSHR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_LSHR_B64 - - class Inst_SOP2__S_ASHR_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ASHR_I32(InFmt_SOP2*); - ~Inst_SOP2__S_ASHR_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ASHR_I32 - - class Inst_SOP2__S_ASHR_I64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ASHR_I64(InFmt_SOP2*); - ~Inst_SOP2__S_ASHR_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ASHR_I64 - - class Inst_SOP2__S_BFM_B32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_BFM_B32(InFmt_SOP2*); - ~Inst_SOP2__S_BFM_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_BFM_B32 - - class Inst_SOP2__S_BFM_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_BFM_B64(InFmt_SOP2*); - ~Inst_SOP2__S_BFM_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_BFM_B64 - - class Inst_SOP2__S_MUL_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_MUL_I32(InFmt_SOP2*); - ~Inst_SOP2__S_MUL_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_MUL_I32 - - class Inst_SOP2__S_BFE_U32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_BFE_U32(InFmt_SOP2*); - ~Inst_SOP2__S_BFE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_BFE_U32 - - class Inst_SOP2__S_BFE_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_BFE_I32(InFmt_SOP2*); - ~Inst_SOP2__S_BFE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_BFE_I32 - - class Inst_SOP2__S_BFE_U64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_BFE_U64(InFmt_SOP2*); - ~Inst_SOP2__S_BFE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_BFE_U64 - - class Inst_SOP2__S_BFE_I64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_BFE_I64(InFmt_SOP2*); - ~Inst_SOP2__S_BFE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_BFE_I64 - - class Inst_SOP2__S_CBRANCH_G_FORK : public Inst_SOP2 - { - public: - Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2*); - ~Inst_SOP2__S_CBRANCH_G_FORK(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_CBRANCH_G_FORK - - class Inst_SOP2__S_ABSDIFF_I32 : public Inst_SOP2 - { - public: - Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2*); - ~Inst_SOP2__S_ABSDIFF_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_ABSDIFF_I32 - - class Inst_SOP2__S_RFE_RESTORE_B64 : public Inst_SOP2 - { - public: - Inst_SOP2__S_RFE_RESTORE_B64(InFmt_SOP2*); - ~Inst_SOP2__S_RFE_RESTORE_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP2__S_RFE_RESTORE_B64 - - class Inst_SOPK__S_MOVK_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_MOVK_I32(InFmt_SOPK*); - ~Inst_SOPK__S_MOVK_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_MOVK_I32 - - class Inst_SOPK__S_CMOVK_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMOVK_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMOVK_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMOVK_I32 - - class Inst_SOPK__S_CMPK_EQ_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_EQ_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_EQ_I32 - - class Inst_SOPK__S_CMPK_LG_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_LG_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_LG_I32 - - class Inst_SOPK__S_CMPK_GT_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_GT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_GT_I32 - - class Inst_SOPK__S_CMPK_GE_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_GE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_GE_I32 - - class Inst_SOPK__S_CMPK_LT_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_LT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_LT_I32 - - class Inst_SOPK__S_CMPK_LE_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_LE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_LE_I32 - - class Inst_SOPK__S_CMPK_EQ_U32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_EQ_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_EQ_U32 - - class Inst_SOPK__S_CMPK_LG_U32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_LG_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_LG_U32 - - class Inst_SOPK__S_CMPK_GT_U32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_GT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_GT_U32 - - class Inst_SOPK__S_CMPK_GE_U32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_GE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_GE_U32 - - class Inst_SOPK__S_CMPK_LT_U32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_LT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_LT_U32 - - class Inst_SOPK__S_CMPK_LE_U32 : public Inst_SOPK - { - public: - Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK*); - ~Inst_SOPK__S_CMPK_LE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CMPK_LE_U32 - - class Inst_SOPK__S_ADDK_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_ADDK_I32(InFmt_SOPK*); - ~Inst_SOPK__S_ADDK_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_ADDK_I32 - - class Inst_SOPK__S_MULK_I32 : public Inst_SOPK - { - public: - Inst_SOPK__S_MULK_I32(InFmt_SOPK*); - ~Inst_SOPK__S_MULK_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_MULK_I32 - - class Inst_SOPK__S_CBRANCH_I_FORK : public Inst_SOPK - { - public: - Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK*); - ~Inst_SOPK__S_CBRANCH_I_FORK(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sdst - return 8; - case 1: // - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_CBRANCH_I_FORK - - class Inst_SOPK__S_GETREG_B32 : public Inst_SOPK - { - public: - Inst_SOPK__S_GETREG_B32(InFmt_SOPK*); - ~Inst_SOPK__S_GETREG_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_GETREG_B32 - - class Inst_SOPK__S_SETREG_B32 : public Inst_SOPK - { - public: - Inst_SOPK__S_SETREG_B32(InFmt_SOPK*); - ~Inst_SOPK__S_SETREG_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_SETREG_B32 - - class Inst_SOPK__S_SETREG_IMM32_B32 : public Inst_SOPK - { - public: - Inst_SOPK__S_SETREG_IMM32_B32(InFmt_SOPK*); - ~Inst_SOPK__S_SETREG_IMM32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm32 - return 4; - case 1: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPK__S_SETREG_IMM32_B32 - - class Inst_SOP1__S_MOV_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOV_B32(InFmt_SOP1*); - ~Inst_SOP1__S_MOV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOV_B32 - - class Inst_SOP1__S_MOV_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOV_B64(InFmt_SOP1*); - ~Inst_SOP1__S_MOV_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOV_B64 - - class Inst_SOP1__S_CMOV_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_CMOV_B32(InFmt_SOP1*); - ~Inst_SOP1__S_CMOV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_CMOV_B32 - - class Inst_SOP1__S_CMOV_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_CMOV_B64(InFmt_SOP1*); - ~Inst_SOP1__S_CMOV_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_CMOV_B64 - - class Inst_SOP1__S_NOT_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_NOT_B32(InFmt_SOP1*); - ~Inst_SOP1__S_NOT_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_NOT_B32 - - class Inst_SOP1__S_NOT_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_NOT_B64(InFmt_SOP1*); - ~Inst_SOP1__S_NOT_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_NOT_B64 - - class Inst_SOP1__S_WQM_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_WQM_B32(InFmt_SOP1*); - ~Inst_SOP1__S_WQM_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_WQM_B32 - - class Inst_SOP1__S_WQM_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_WQM_B64(InFmt_SOP1*); - ~Inst_SOP1__S_WQM_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_WQM_B64 - - class Inst_SOP1__S_BREV_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BREV_B32(InFmt_SOP1*); - ~Inst_SOP1__S_BREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BREV_B32 - - class Inst_SOP1__S_BREV_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BREV_B64(InFmt_SOP1*); - ~Inst_SOP1__S_BREV_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BREV_B64 - - class Inst_SOP1__S_BCNT0_I32_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1*); - ~Inst_SOP1__S_BCNT0_I32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BCNT0_I32_B32 - - class Inst_SOP1__S_BCNT0_I32_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1*); - ~Inst_SOP1__S_BCNT0_I32_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BCNT0_I32_B64 - - class Inst_SOP1__S_BCNT1_I32_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1*); - ~Inst_SOP1__S_BCNT1_I32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BCNT1_I32_B32 - - class Inst_SOP1__S_BCNT1_I32_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1*); - ~Inst_SOP1__S_BCNT1_I32_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BCNT1_I32_B64 - - class Inst_SOP1__S_FF0_I32_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1*); - ~Inst_SOP1__S_FF0_I32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FF0_I32_B32 - - class Inst_SOP1__S_FF0_I32_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1*); - ~Inst_SOP1__S_FF0_I32_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FF0_I32_B64 - - class Inst_SOP1__S_FF1_I32_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1*); - ~Inst_SOP1__S_FF1_I32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FF1_I32_B32 - - class Inst_SOP1__S_FF1_I32_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1*); - ~Inst_SOP1__S_FF1_I32_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FF1_I32_B64 - - class Inst_SOP1__S_FLBIT_I32_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1*); - ~Inst_SOP1__S_FLBIT_I32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FLBIT_I32_B32 - - class Inst_SOP1__S_FLBIT_I32_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1*); - ~Inst_SOP1__S_FLBIT_I32_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FLBIT_I32_B64 - - class Inst_SOP1__S_FLBIT_I32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FLBIT_I32(InFmt_SOP1*); - ~Inst_SOP1__S_FLBIT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FLBIT_I32 - - class Inst_SOP1__S_FLBIT_I32_I64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1*); - ~Inst_SOP1__S_FLBIT_I32_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_FLBIT_I32_I64 - - class Inst_SOP1__S_SEXT_I32_I8 : public Inst_SOP1 - { - public: - Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1*); - ~Inst_SOP1__S_SEXT_I32_I8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_SEXT_I32_I8 - - class Inst_SOP1__S_SEXT_I32_I16 : public Inst_SOP1 - { - public: - Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1*); - ~Inst_SOP1__S_SEXT_I32_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_SEXT_I32_I16 - - class Inst_SOP1__S_BITSET0_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BITSET0_B32(InFmt_SOP1*); - ~Inst_SOP1__S_BITSET0_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BITSET0_B32 - - class Inst_SOP1__S_BITSET0_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BITSET0_B64(InFmt_SOP1*); - ~Inst_SOP1__S_BITSET0_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BITSET0_B64 - - class Inst_SOP1__S_BITSET1_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BITSET1_B32(InFmt_SOP1*); - ~Inst_SOP1__S_BITSET1_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BITSET1_B32 - - class Inst_SOP1__S_BITSET1_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_BITSET1_B64(InFmt_SOP1*); - ~Inst_SOP1__S_BITSET1_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_BITSET1_B64 - - class Inst_SOP1__S_GETPC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_GETPC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_GETPC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_GETPC_B64 - - class Inst_SOP1__S_SETPC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_SETPC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_SETPC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_SETPC_B64 - - class Inst_SOP1__S_SWAPPC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_SWAPPC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_SWAPPC_B64 - - class Inst_SOP1__S_RFE_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_RFE_B64(InFmt_SOP1*); - ~Inst_SOP1__S_RFE_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_RFE_B64 - - class Inst_SOP1__S_AND_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_AND_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_AND_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_AND_SAVEEXEC_B64 - - class Inst_SOP1__S_OR_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_OR_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_OR_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_OR_SAVEEXEC_B64 - - class Inst_SOP1__S_XOR_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_XOR_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_XOR_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_XOR_SAVEEXEC_B64 - - class Inst_SOP1__S_ANDN2_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_ANDN2_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - class Inst_SOP1__S_ORN2_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_ORN2_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_ORN2_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - class Inst_SOP1__S_NAND_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_NAND_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_NAND_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_NAND_SAVEEXEC_B64 - - class Inst_SOP1__S_NOR_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_NOR_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_NOR_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_NOR_SAVEEXEC_B64 - - class Inst_SOP1__S_XNOR_SAVEEXEC_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_XNOR_SAVEEXEC_B64(InFmt_SOP1*); - ~Inst_SOP1__S_XNOR_SAVEEXEC_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - class Inst_SOP1__S_QUADMASK_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1*); - ~Inst_SOP1__S_QUADMASK_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_QUADMASK_B32 - - class Inst_SOP1__S_QUADMASK_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1*); - ~Inst_SOP1__S_QUADMASK_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_QUADMASK_B64 - - class Inst_SOP1__S_MOVRELS_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1*); - ~Inst_SOP1__S_MOVRELS_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOVRELS_B32 - - class Inst_SOP1__S_MOVRELS_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1*); - ~Inst_SOP1__S_MOVRELS_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sdst - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOVRELS_B64 - - class Inst_SOP1__S_MOVRELD_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1*); - ~Inst_SOP1__S_MOVRELD_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOVRELD_B32 - - class Inst_SOP1__S_MOVRELD_B64 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1*); - ~Inst_SOP1__S_MOVRELD_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 8; - case 1: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOVRELD_B64 - - class Inst_SOP1__S_CBRANCH_JOIN : public Inst_SOP1 - { - public: - Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1*); - ~Inst_SOP1__S_CBRANCH_JOIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_CBRANCH_JOIN - - class Inst_SOP1__S_ABS_I32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_ABS_I32(InFmt_SOP1*); - ~Inst_SOP1__S_ABS_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_ABS_I32 - - class Inst_SOP1__S_MOV_FED_B32 : public Inst_SOP1 - { - public: - Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1*); - ~Inst_SOP1__S_MOV_FED_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_MOV_FED_B32 - - class Inst_SOP1__S_SET_GPR_IDX_IDX : public Inst_SOP1 - { - public: - Inst_SOP1__S_SET_GPR_IDX_IDX(InFmt_SOP1*); - ~Inst_SOP1__S_SET_GPR_IDX_IDX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOP1__S_SET_GPR_IDX_IDX - - class Inst_SOPC__S_CMP_EQ_I32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_EQ_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_EQ_I32 - - class Inst_SOPC__S_CMP_LG_I32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LG_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LG_I32 - - class Inst_SOPC__S_CMP_GT_I32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_GT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_GT_I32 - - class Inst_SOPC__S_CMP_GE_I32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_GE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_GE_I32 - - class Inst_SOPC__S_CMP_LT_I32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LT_I32 - - class Inst_SOPC__S_CMP_LE_I32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LE_I32 - - class Inst_SOPC__S_CMP_EQ_U32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_EQ_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_EQ_U32 - - class Inst_SOPC__S_CMP_LG_U32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LG_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LG_U32 - - class Inst_SOPC__S_CMP_GT_U32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_GT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_GT_U32 - - class Inst_SOPC__S_CMP_GE_U32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_GE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_GE_U32 - - class Inst_SOPC__S_CMP_LT_U32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LT_U32 - - class Inst_SOPC__S_CMP_LE_U32 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LE_U32 - - class Inst_SOPC__S_BITCMP0_B32 : public Inst_SOPC - { - public: - Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC*); - ~Inst_SOPC__S_BITCMP0_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_BITCMP0_B32 - - class Inst_SOPC__S_BITCMP1_B32 : public Inst_SOPC - { - public: - Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC*); - ~Inst_SOPC__S_BITCMP1_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_BITCMP1_B32 - - class Inst_SOPC__S_BITCMP0_B64 : public Inst_SOPC - { - public: - Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC*); - ~Inst_SOPC__S_BITCMP0_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_BITCMP0_B64 - - class Inst_SOPC__S_BITCMP1_B64 : public Inst_SOPC - { - public: - Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC*); - ~Inst_SOPC__S_BITCMP1_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_BITCMP1_B64 - - class Inst_SOPC__S_SETVSKIP : public Inst_SOPC - { - public: - Inst_SOPC__S_SETVSKIP(InFmt_SOPC*); - ~Inst_SOPC__S_SETVSKIP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_SETVSKIP - - class Inst_SOPC__S_SET_GPR_IDX_ON : public Inst_SOPC - { - public: - Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC*); - ~Inst_SOPC__S_SET_GPR_IDX_ON(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //simm4 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_SET_GPR_IDX_ON - - class Inst_SOPC__S_CMP_EQ_U64 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_EQ_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_EQ_U64 - - class Inst_SOPC__S_CMP_LG_U64 : public Inst_SOPC - { - public: - Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC*); - ~Inst_SOPC__S_CMP_LG_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 8; - case 1: //ssrc_1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPC__S_CMP_LG_U64 - - class Inst_SOPP__S_NOP : public Inst_SOPP - { - public: - Inst_SOPP__S_NOP(InFmt_SOPP*); - ~Inst_SOPP__S_NOP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_NOP - - class Inst_SOPP__S_ENDPGM : public Inst_SOPP - { - public: - Inst_SOPP__S_ENDPGM(InFmt_SOPP*); - ~Inst_SOPP__S_ENDPGM(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_ENDPGM - - class Inst_SOPP__S_BRANCH : public Inst_SOPP - { - public: - Inst_SOPP__S_BRANCH(InFmt_SOPP*); - ~Inst_SOPP__S_BRANCH(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_BRANCH - - class Inst_SOPP__S_WAKEUP : public Inst_SOPP - { - public: - Inst_SOPP__S_WAKEUP(InFmt_SOPP*); - ~Inst_SOPP__S_WAKEUP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_WAKEUP - - class Inst_SOPP__S_CBRANCH_SCC0 : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_SCC0(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_SCC0 - - class Inst_SOPP__S_CBRANCH_SCC1 : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_SCC1(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_SCC1 - - class Inst_SOPP__S_CBRANCH_VCCZ : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_VCCZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - case 1: - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_VCCZ - - class Inst_SOPP__S_CBRANCH_VCCNZ : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_VCCNZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - case 1: - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_VCCNZ - - class Inst_SOPP__S_CBRANCH_EXECZ : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_EXECZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_EXECZ - - class Inst_SOPP__S_CBRANCH_EXECNZ : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_EXECNZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_EXECNZ - - class Inst_SOPP__S_BARRIER : public Inst_SOPP - { - public: - Inst_SOPP__S_BARRIER(InFmt_SOPP*); - ~Inst_SOPP__S_BARRIER(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_BARRIER - - class Inst_SOPP__S_SETKILL : public Inst_SOPP - { - public: - Inst_SOPP__S_SETKILL(InFmt_SOPP*); - ~Inst_SOPP__S_SETKILL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SETKILL - - class Inst_SOPP__S_WAITCNT : public Inst_SOPP - { - public: - Inst_SOPP__S_WAITCNT(InFmt_SOPP*); - ~Inst_SOPP__S_WAITCNT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_WAITCNT - - class Inst_SOPP__S_SETHALT : public Inst_SOPP - { - public: - Inst_SOPP__S_SETHALT(InFmt_SOPP*); - ~Inst_SOPP__S_SETHALT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SETHALT - - class Inst_SOPP__S_SLEEP : public Inst_SOPP - { - public: - Inst_SOPP__S_SLEEP(InFmt_SOPP*); - ~Inst_SOPP__S_SLEEP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SLEEP - - class Inst_SOPP__S_SETPRIO : public Inst_SOPP - { - public: - Inst_SOPP__S_SETPRIO(InFmt_SOPP*); - ~Inst_SOPP__S_SETPRIO(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SETPRIO - - class Inst_SOPP__S_SENDMSG : public Inst_SOPP - { - public: - Inst_SOPP__S_SENDMSG(InFmt_SOPP*); - ~Inst_SOPP__S_SENDMSG(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SENDMSG - - class Inst_SOPP__S_SENDMSGHALT : public Inst_SOPP - { - public: - Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP*); - ~Inst_SOPP__S_SENDMSGHALT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SENDMSGHALT - - class Inst_SOPP__S_TRAP : public Inst_SOPP - { - public: - Inst_SOPP__S_TRAP(InFmt_SOPP*); - ~Inst_SOPP__S_TRAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_TRAP - - class Inst_SOPP__S_ICACHE_INV : public Inst_SOPP - { - public: - Inst_SOPP__S_ICACHE_INV(InFmt_SOPP*); - ~Inst_SOPP__S_ICACHE_INV(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_ICACHE_INV - - class Inst_SOPP__S_INCPERFLEVEL : public Inst_SOPP - { - public: - Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP*); - ~Inst_SOPP__S_INCPERFLEVEL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_INCPERFLEVEL - - class Inst_SOPP__S_DECPERFLEVEL : public Inst_SOPP - { - public: - Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP*); - ~Inst_SOPP__S_DECPERFLEVEL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_DECPERFLEVEL - - class Inst_SOPP__S_TTRACEDATA : public Inst_SOPP - { - public: - Inst_SOPP__S_TTRACEDATA(InFmt_SOPP*); - ~Inst_SOPP__S_TTRACEDATA(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_TTRACEDATA - - class Inst_SOPP__S_CBRANCH_CDBGSYS : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_CDBGSYS(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_CDBGSYS(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_CDBGSYS - - class Inst_SOPP__S_CBRANCH_CDBGUSER : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_CDBGUSER(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_CDBGUSER(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_CDBGUSER - - class Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - class Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER : public Inst_SOPP - { - public: - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP*); - ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //label - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - class Inst_SOPP__S_ENDPGM_SAVED : public Inst_SOPP - { - public: - Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP*); - ~Inst_SOPP__S_ENDPGM_SAVED(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_ENDPGM_SAVED - - class Inst_SOPP__S_SET_GPR_IDX_OFF : public Inst_SOPP - { - public: - Inst_SOPP__S_SET_GPR_IDX_OFF(InFmt_SOPP*); - ~Inst_SOPP__S_SET_GPR_IDX_OFF(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SET_GPR_IDX_OFF - - class Inst_SOPP__S_SET_GPR_IDX_MODE : public Inst_SOPP - { - public: - Inst_SOPP__S_SET_GPR_IDX_MODE(InFmt_SOPP*); - ~Inst_SOPP__S_SET_GPR_IDX_MODE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //simm16 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SOPP__S_SET_GPR_IDX_MODE - - class Inst_SMEM__S_LOAD_DWORD : public Inst_SMEM - { - public: - Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM*); - ~Inst_SMEM__S_LOAD_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 8; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_LOAD_DWORD - - class Inst_SMEM__S_LOAD_DWORDX2 : public Inst_SMEM - { - public: - Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM*); - ~Inst_SMEM__S_LOAD_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 8; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_LOAD_DWORDX2 - - class Inst_SMEM__S_LOAD_DWORDX4 : public Inst_SMEM - { - public: - Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM*); - ~Inst_SMEM__S_LOAD_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 8; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_LOAD_DWORDX4 - - class Inst_SMEM__S_LOAD_DWORDX8 : public Inst_SMEM - { - public: - Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM*); - ~Inst_SMEM__S_LOAD_DWORDX8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 8; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_LOAD_DWORDX8 - - class Inst_SMEM__S_LOAD_DWORDX16 : public Inst_SMEM - { - public: - Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM*); - ~Inst_SMEM__S_LOAD_DWORDX16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 8; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 64; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_LOAD_DWORDX16 - - class Inst_SMEM__S_BUFFER_LOAD_DWORD : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_LOAD_DWORD(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_LOAD_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 16; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_LOAD_DWORD - - class Inst_SMEM__S_BUFFER_LOAD_DWORDX2 : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_LOAD_DWORDX2(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 16; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - class Inst_SMEM__S_BUFFER_LOAD_DWORDX4 : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_LOAD_DWORDX4(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 16; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - class Inst_SMEM__S_BUFFER_LOAD_DWORDX8 : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_LOAD_DWORDX8(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 16; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - class Inst_SMEM__S_BUFFER_LOAD_DWORDX16 : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_LOAD_DWORDX16(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_base - return 16; - case 1: //offset - return 4; - case 2: //sgpr_dst - return 64; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - class Inst_SMEM__S_STORE_DWORD : public Inst_SMEM - { - public: - Inst_SMEM__S_STORE_DWORD(InFmt_SMEM*); - ~Inst_SMEM__S_STORE_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_data - return 4; - case 1: //sgpr_base - return 8; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_STORE_DWORD - - class Inst_SMEM__S_STORE_DWORDX2 : public Inst_SMEM - { - public: - Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM*); - ~Inst_SMEM__S_STORE_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_data - return 8; - case 1: //sgpr_base - return 8; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_STORE_DWORDX2 - - class Inst_SMEM__S_STORE_DWORDX4 : public Inst_SMEM - { - public: - Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM*); - ~Inst_SMEM__S_STORE_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_data - return 16; - case 1: //sgpr_base - return 8; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_STORE_DWORDX4 - - class Inst_SMEM__S_BUFFER_STORE_DWORD : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_STORE_DWORD(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_STORE_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_data - return 4; - case 1: //sgpr_base - return 16; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_STORE_DWORD - - class Inst_SMEM__S_BUFFER_STORE_DWORDX2 : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_STORE_DWORDX2(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_STORE_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_data - return 8; - case 1: //sgpr_base - return 16; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - class Inst_SMEM__S_BUFFER_STORE_DWORDX4 : public Inst_SMEM - { - public: - Inst_SMEM__S_BUFFER_STORE_DWORDX4(InFmt_SMEM*); - ~Inst_SMEM__S_BUFFER_STORE_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_data - return 16; - case 1: //sgpr_base - return 16; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - class Inst_SMEM__S_DCACHE_INV : public Inst_SMEM - { - public: - Inst_SMEM__S_DCACHE_INV(InFmt_SMEM*); - ~Inst_SMEM__S_DCACHE_INV(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_DCACHE_INV - - class Inst_SMEM__S_DCACHE_WB : public Inst_SMEM - { - public: - Inst_SMEM__S_DCACHE_WB(InFmt_SMEM*); - ~Inst_SMEM__S_DCACHE_WB(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_DCACHE_WB - - class Inst_SMEM__S_DCACHE_INV_VOL : public Inst_SMEM - { - public: - Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM*); - ~Inst_SMEM__S_DCACHE_INV_VOL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_DCACHE_INV_VOL - - class Inst_SMEM__S_DCACHE_WB_VOL : public Inst_SMEM - { - public: - Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM*); - ~Inst_SMEM__S_DCACHE_WB_VOL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_DCACHE_WB_VOL - - class Inst_SMEM__S_MEMTIME : public Inst_SMEM - { - public: - Inst_SMEM__S_MEMTIME(InFmt_SMEM*); - ~Inst_SMEM__S_MEMTIME(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_MEMTIME - - class Inst_SMEM__S_MEMREALTIME : public Inst_SMEM - { - public: - Inst_SMEM__S_MEMREALTIME(InFmt_SMEM*); - ~Inst_SMEM__S_MEMREALTIME(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_MEMREALTIME - - class Inst_SMEM__S_ATC_PROBE : public Inst_SMEM - { - public: - Inst_SMEM__S_ATC_PROBE(InFmt_SMEM*); - ~Inst_SMEM__S_ATC_PROBE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //perm_rwx - return 32; - case 1: //sgpr_base - return 8; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_ATC_PROBE - - class Inst_SMEM__S_ATC_PROBE_BUFFER : public Inst_SMEM - { - public: - Inst_SMEM__S_ATC_PROBE_BUFFER(InFmt_SMEM*); - ~Inst_SMEM__S_ATC_PROBE_BUFFER(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //perm_rwx - return 32; - case 1: //sgpr_base - return 16; - case 2: //offset - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_SMEM__S_ATC_PROBE_BUFFER - - class Inst_VOP2__V_CNDMASK_B32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2*); - ~Inst_VOP2__V_CNDMASK_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_CNDMASK_B32 - - class Inst_VOP2__V_ADD_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ADD_F32(InFmt_VOP2*); - ~Inst_VOP2__V_ADD_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADD_F32 - - class Inst_VOP2__V_SUB_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUB_F32(InFmt_VOP2*); - ~Inst_VOP2__V_SUB_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUB_F32 - - class Inst_VOP2__V_SUBREV_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUBREV_F32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBREV_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBREV_F32 - - class Inst_VOP2__V_MUL_LEGACY_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_LEGACY_F32 - - class Inst_VOP2__V_MUL_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_F32 - - class Inst_VOP2__V_MUL_I32_I24 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_I32_I24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_I32_I24 - - class Inst_VOP2__V_MUL_HI_I32_I24 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_HI_I32_I24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_HI_I32_I24 - - class Inst_VOP2__V_MUL_U32_U24 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_U32_U24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_U32_U24 - - class Inst_VOP2__V_MUL_HI_U32_U24 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_HI_U32_U24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_HI_U32_U24 - - class Inst_VOP2__V_MIN_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MIN_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MIN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MIN_F32 - - class Inst_VOP2__V_MAX_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAX_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MAX_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAX_F32 - - class Inst_VOP2__V_MIN_I32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MIN_I32(InFmt_VOP2*); - ~Inst_VOP2__V_MIN_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MIN_I32 - - class Inst_VOP2__V_MAX_I32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAX_I32(InFmt_VOP2*); - ~Inst_VOP2__V_MAX_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAX_I32 - - class Inst_VOP2__V_MIN_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MIN_U32(InFmt_VOP2*); - ~Inst_VOP2__V_MIN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MIN_U32 - - class Inst_VOP2__V_MAX_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAX_U32(InFmt_VOP2*); - ~Inst_VOP2__V_MAX_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAX_U32 - - class Inst_VOP2__V_LSHRREV_B32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2*); - ~Inst_VOP2__V_LSHRREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_LSHRREV_B32 - - class Inst_VOP2__V_ASHRREV_I32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2*); - ~Inst_VOP2__V_ASHRREV_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ASHRREV_I32 - - class Inst_VOP2__V_LSHLREV_B32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2*); - ~Inst_VOP2__V_LSHLREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_LSHLREV_B32 - - class Inst_VOP2__V_AND_B32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_AND_B32(InFmt_VOP2*); - ~Inst_VOP2__V_AND_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_AND_B32 - - class Inst_VOP2__V_OR_B32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_OR_B32(InFmt_VOP2*); - ~Inst_VOP2__V_OR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_OR_B32 - - class Inst_VOP2__V_XOR_B32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_XOR_B32(InFmt_VOP2*); - ~Inst_VOP2__V_XOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_XOR_B32 - - class Inst_VOP2__V_MAC_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAC_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MAC_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAC_F32 - - class Inst_VOP2__V_MADMK_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MADMK_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MADMK_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MADMK_F32 - - class Inst_VOP2__V_MADAK_F32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MADAK_F32(InFmt_VOP2*); - ~Inst_VOP2__V_MADAK_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MADAK_F32 - - class Inst_VOP2__V_ADD_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ADD_U32(InFmt_VOP2*); - ~Inst_VOP2__V_ADD_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - case 3: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADD_U32 - - class Inst_VOP2__V_SUB_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUB_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - case 3: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUB_U32 - - class Inst_VOP2__V_SUBREV_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUBREV_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBREV_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - case 3: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBREV_U32 - - class Inst_VOP2__V_ADDC_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ADDC_U32(InFmt_VOP2*); - ~Inst_VOP2__V_ADDC_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - case 3: //vdst - return 4; - case 4: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADDC_U32 - - class Inst_VOP2__V_SUBB_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUBB_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - case 3: //vdst - return 4; - case 4: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBB_U32 - - class Inst_VOP2__V_SUBBREV_U32 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2*); - ~Inst_VOP2__V_SUBBREV_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - case 3: //vdst - return 4; - case 4: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBBREV_U32 - - class Inst_VOP2__V_ADD_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ADD_F16(InFmt_VOP2*); - ~Inst_VOP2__V_ADD_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADD_F16 - - class Inst_VOP2__V_SUB_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUB_F16(InFmt_VOP2*); - ~Inst_VOP2__V_SUB_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUB_F16 - - class Inst_VOP2__V_SUBREV_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUBREV_F16(InFmt_VOP2*); - ~Inst_VOP2__V_SUBREV_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBREV_F16 - - class Inst_VOP2__V_MUL_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_F16(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_F16 - - class Inst_VOP2__V_MAC_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAC_F16(InFmt_VOP2*); - ~Inst_VOP2__V_MAC_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAC_F16 - - class Inst_VOP2__V_MADMK_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MADMK_F16(InFmt_VOP2*); - ~Inst_VOP2__V_MADMK_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MADMK_F16 - - class Inst_VOP2__V_MADAK_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MADAK_F16(InFmt_VOP2*); - ~Inst_VOP2__V_MADAK_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MADAK_F16 - - class Inst_VOP2__V_ADD_U16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ADD_U16(InFmt_VOP2*); - ~Inst_VOP2__V_ADD_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ADD_U16 - - class Inst_VOP2__V_SUB_U16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUB_U16(InFmt_VOP2*); - ~Inst_VOP2__V_SUB_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUB_U16 - - class Inst_VOP2__V_SUBREV_U16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_SUBREV_U16(InFmt_VOP2*); - ~Inst_VOP2__V_SUBREV_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_SUBREV_U16 - - class Inst_VOP2__V_MUL_LO_U16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2*); - ~Inst_VOP2__V_MUL_LO_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MUL_LO_U16 - - class Inst_VOP2__V_LSHLREV_B16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2*); - ~Inst_VOP2__V_LSHLREV_B16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_LSHLREV_B16 - - class Inst_VOP2__V_LSHRREV_B16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2*); - ~Inst_VOP2__V_LSHRREV_B16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_LSHRREV_B16 - - class Inst_VOP2__V_ASHRREV_I16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2*); - ~Inst_VOP2__V_ASHRREV_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_ASHRREV_I16 - - class Inst_VOP2__V_MAX_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAX_F16(InFmt_VOP2*); - ~Inst_VOP2__V_MAX_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAX_F16 - - class Inst_VOP2__V_MIN_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MIN_F16(InFmt_VOP2*); - ~Inst_VOP2__V_MIN_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MIN_F16 - - class Inst_VOP2__V_MAX_U16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAX_U16(InFmt_VOP2*); - ~Inst_VOP2__V_MAX_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAX_U16 - - class Inst_VOP2__V_MAX_I16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MAX_I16(InFmt_VOP2*); - ~Inst_VOP2__V_MAX_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MAX_I16 - - class Inst_VOP2__V_MIN_U16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MIN_U16(InFmt_VOP2*); - ~Inst_VOP2__V_MIN_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MIN_U16 - - class Inst_VOP2__V_MIN_I16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_MIN_I16(InFmt_VOP2*); - ~Inst_VOP2__V_MIN_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_MIN_I16 - - class Inst_VOP2__V_LDEXP_F16 : public Inst_VOP2 - { - public: - Inst_VOP2__V_LDEXP_F16(InFmt_VOP2*); - ~Inst_VOP2__V_LDEXP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP2__V_LDEXP_F16 - - class Inst_VOP1__V_NOP : public Inst_VOP1 - { - public: - Inst_VOP1__V_NOP(InFmt_VOP1*); - ~Inst_VOP1__V_NOP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_NOP - - class Inst_VOP1__V_MOV_B32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_MOV_B32(InFmt_VOP1*); - ~Inst_VOP1__V_MOV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_MOV_B32 - - class Inst_VOP1__V_READFIRSTLANE_B32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_READFIRSTLANE_B32(InFmt_VOP1*); - ~Inst_VOP1__V_READFIRSTLANE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vsrc - return 4; - case 1: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_READFIRSTLANE_B32 - - class Inst_VOP1__V_CVT_I32_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_I32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_I32_F64 - - class Inst_VOP1__V_CVT_F64_I32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F64_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F64_I32 - - class Inst_VOP1__V_CVT_F32_I32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_I32 - - class Inst_VOP1__V_CVT_F32_U32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_U32 - - class Inst_VOP1__V_CVT_U32_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_U32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_U32_F32 - - class Inst_VOP1__V_CVT_I32_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_I32_F32 - - class Inst_VOP1__V_MOV_FED_B32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1*); - ~Inst_VOP1__V_MOV_FED_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_MOV_FED_B32 - - class Inst_VOP1__V_CVT_F16_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F16_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F16_F32 - - class Inst_VOP1__V_CVT_F32_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_F16 - - class Inst_VOP1__V_CVT_RPI_I32_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_RPI_I32_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_RPI_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_RPI_I32_F32 - - class Inst_VOP1__V_CVT_FLR_I32_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_FLR_I32_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_FLR_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_FLR_I32_F32 - - class Inst_VOP1__V_CVT_OFF_F32_I4 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_OFF_F32_I4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_OFF_F32_I4 - - class Inst_VOP1__V_CVT_F32_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_F64 - - class Inst_VOP1__V_CVT_F64_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F64_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F64_F32 - - class Inst_VOP1__V_CVT_F32_UBYTE0 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_UBYTE0(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_UBYTE0 - - class Inst_VOP1__V_CVT_F32_UBYTE1 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_UBYTE1(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_UBYTE1 - - class Inst_VOP1__V_CVT_F32_UBYTE2 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_UBYTE2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_UBYTE2 - - class Inst_VOP1__V_CVT_F32_UBYTE3 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F32_UBYTE3(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F32_UBYTE3 - - class Inst_VOP1__V_CVT_U32_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_U32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_U32_F64 - - class Inst_VOP1__V_CVT_F64_U32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F64_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F64_U32 - - class Inst_VOP1__V_TRUNC_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_TRUNC_F64(InFmt_VOP1*); - ~Inst_VOP1__V_TRUNC_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_TRUNC_F64 - - class Inst_VOP1__V_CEIL_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CEIL_F64(InFmt_VOP1*); - ~Inst_VOP1__V_CEIL_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CEIL_F64 - - class Inst_VOP1__V_RNDNE_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RNDNE_F64(InFmt_VOP1*); - ~Inst_VOP1__V_RNDNE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RNDNE_F64 - - class Inst_VOP1__V_FLOOR_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FLOOR_F64(InFmt_VOP1*); - ~Inst_VOP1__V_FLOOR_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FLOOR_F64 - - class Inst_VOP1__V_FRACT_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FRACT_F32(InFmt_VOP1*); - ~Inst_VOP1__V_FRACT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FRACT_F32 - - class Inst_VOP1__V_TRUNC_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_TRUNC_F32(InFmt_VOP1*); - ~Inst_VOP1__V_TRUNC_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_TRUNC_F32 - - class Inst_VOP1__V_CEIL_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CEIL_F32(InFmt_VOP1*); - ~Inst_VOP1__V_CEIL_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CEIL_F32 - - class Inst_VOP1__V_RNDNE_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RNDNE_F32(InFmt_VOP1*); - ~Inst_VOP1__V_RNDNE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RNDNE_F32 - - class Inst_VOP1__V_FLOOR_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FLOOR_F32(InFmt_VOP1*); - ~Inst_VOP1__V_FLOOR_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FLOOR_F32 - - class Inst_VOP1__V_EXP_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_EXP_F32(InFmt_VOP1*); - ~Inst_VOP1__V_EXP_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_EXP_F32 - - class Inst_VOP1__V_LOG_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_LOG_F32(InFmt_VOP1*); - ~Inst_VOP1__V_LOG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_LOG_F32 - - class Inst_VOP1__V_RCP_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RCP_F32(InFmt_VOP1*); - ~Inst_VOP1__V_RCP_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RCP_F32 - - class Inst_VOP1__V_RCP_IFLAG_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1*); - ~Inst_VOP1__V_RCP_IFLAG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RCP_IFLAG_F32 - - class Inst_VOP1__V_RSQ_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RSQ_F32(InFmt_VOP1*); - ~Inst_VOP1__V_RSQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RSQ_F32 - - class Inst_VOP1__V_RCP_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RCP_F64(InFmt_VOP1*); - ~Inst_VOP1__V_RCP_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RCP_F64 - - class Inst_VOP1__V_RSQ_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RSQ_F64(InFmt_VOP1*); - ~Inst_VOP1__V_RSQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RSQ_F64 - - class Inst_VOP1__V_SQRT_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_SQRT_F32(InFmt_VOP1*); - ~Inst_VOP1__V_SQRT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_SQRT_F32 - - class Inst_VOP1__V_SQRT_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_SQRT_F64(InFmt_VOP1*); - ~Inst_VOP1__V_SQRT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_SQRT_F64 - - class Inst_VOP1__V_SIN_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_SIN_F32(InFmt_VOP1*); - ~Inst_VOP1__V_SIN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_SIN_F32 - - class Inst_VOP1__V_COS_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_COS_F32(InFmt_VOP1*); - ~Inst_VOP1__V_COS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_COS_F32 - - class Inst_VOP1__V_NOT_B32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_NOT_B32(InFmt_VOP1*); - ~Inst_VOP1__V_NOT_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_NOT_B32 - - class Inst_VOP1__V_BFREV_B32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_BFREV_B32(InFmt_VOP1*); - ~Inst_VOP1__V_BFREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_BFREV_B32 - - class Inst_VOP1__V_FFBH_U32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FFBH_U32(InFmt_VOP1*); - ~Inst_VOP1__V_FFBH_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FFBH_U32 - - class Inst_VOP1__V_FFBL_B32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FFBL_B32(InFmt_VOP1*); - ~Inst_VOP1__V_FFBL_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FFBL_B32 - - class Inst_VOP1__V_FFBH_I32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FFBH_I32(InFmt_VOP1*); - ~Inst_VOP1__V_FFBH_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FFBH_I32 - - class Inst_VOP1__V_FREXP_EXP_I32_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FREXP_EXP_I32_F64(InFmt_VOP1*); - ~Inst_VOP1__V_FREXP_EXP_I32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FREXP_EXP_I32_F64 - - class Inst_VOP1__V_FREXP_MANT_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1*); - ~Inst_VOP1__V_FREXP_MANT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FREXP_MANT_F64 - - class Inst_VOP1__V_FRACT_F64 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FRACT_F64(InFmt_VOP1*); - ~Inst_VOP1__V_FRACT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FRACT_F64 - - class Inst_VOP1__V_FREXP_EXP_I32_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FREXP_EXP_I32_F32(InFmt_VOP1*); - ~Inst_VOP1__V_FREXP_EXP_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FREXP_EXP_I32_F32 - - class Inst_VOP1__V_FREXP_MANT_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1*); - ~Inst_VOP1__V_FREXP_MANT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FREXP_MANT_F32 - - class Inst_VOP1__V_CLREXCP : public Inst_VOP1 - { - public: - Inst_VOP1__V_CLREXCP(InFmt_VOP1*); - ~Inst_VOP1__V_CLREXCP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CLREXCP - - class Inst_VOP1__V_CVT_F16_U16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F16_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F16_U16 - - class Inst_VOP1__V_CVT_F16_I16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_F16_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_F16_I16 - - class Inst_VOP1__V_CVT_U16_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_U16_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_U16_F16 - - class Inst_VOP1__V_CVT_I16_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1*); - ~Inst_VOP1__V_CVT_I16_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CVT_I16_F16 - - class Inst_VOP1__V_RCP_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RCP_F16(InFmt_VOP1*); - ~Inst_VOP1__V_RCP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RCP_F16 - - class Inst_VOP1__V_SQRT_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_SQRT_F16(InFmt_VOP1*); - ~Inst_VOP1__V_SQRT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_SQRT_F16 - - class Inst_VOP1__V_RSQ_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RSQ_F16(InFmt_VOP1*); - ~Inst_VOP1__V_RSQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RSQ_F16 - - class Inst_VOP1__V_LOG_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_LOG_F16(InFmt_VOP1*); - ~Inst_VOP1__V_LOG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_LOG_F16 - - class Inst_VOP1__V_EXP_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_EXP_F16(InFmt_VOP1*); - ~Inst_VOP1__V_EXP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_EXP_F16 - - class Inst_VOP1__V_FREXP_MANT_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1*); - ~Inst_VOP1__V_FREXP_MANT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FREXP_MANT_F16 - - class Inst_VOP1__V_FREXP_EXP_I16_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FREXP_EXP_I16_F16(InFmt_VOP1*); - ~Inst_VOP1__V_FREXP_EXP_I16_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FREXP_EXP_I16_F16 - - class Inst_VOP1__V_FLOOR_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FLOOR_F16(InFmt_VOP1*); - ~Inst_VOP1__V_FLOOR_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FLOOR_F16 - - class Inst_VOP1__V_CEIL_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_CEIL_F16(InFmt_VOP1*); - ~Inst_VOP1__V_CEIL_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_CEIL_F16 - - class Inst_VOP1__V_TRUNC_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_TRUNC_F16(InFmt_VOP1*); - ~Inst_VOP1__V_TRUNC_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_TRUNC_F16 - - class Inst_VOP1__V_RNDNE_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_RNDNE_F16(InFmt_VOP1*); - ~Inst_VOP1__V_RNDNE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_RNDNE_F16 - - class Inst_VOP1__V_FRACT_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_FRACT_F16(InFmt_VOP1*); - ~Inst_VOP1__V_FRACT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_FRACT_F16 - - class Inst_VOP1__V_SIN_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_SIN_F16(InFmt_VOP1*); - ~Inst_VOP1__V_SIN_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_SIN_F16 - - class Inst_VOP1__V_COS_F16 : public Inst_VOP1 - { - public: - Inst_VOP1__V_COS_F16(InFmt_VOP1*); - ~Inst_VOP1__V_COS_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_COS_F16 - - class Inst_VOP1__V_EXP_LEGACY_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1*); - ~Inst_VOP1__V_EXP_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_EXP_LEGACY_F32 - - class Inst_VOP1__V_LOG_LEGACY_F32 : public Inst_VOP1 - { - public: - Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1*); - ~Inst_VOP1__V_LOG_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP1__V_LOG_LEGACY_F32 - - class Inst_VOPC__V_CMP_CLASS_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_CLASS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_CLASS_F32 - - class Inst_VOPC__V_CMPX_CLASS_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_CLASS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_CLASS_F32 - - class Inst_VOPC__V_CMP_CLASS_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_CLASS_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_CLASS_F64 - - class Inst_VOPC__V_CMPX_CLASS_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_CLASS_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_CLASS_F64 - - class Inst_VOPC__V_CMP_CLASS_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_CLASS_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_CLASS_F16 - - class Inst_VOPC__V_CMPX_CLASS_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_CLASS_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_CLASS_F16 - - class Inst_VOPC__V_CMP_F_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_F16 - - class Inst_VOPC__V_CMP_LT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_F16 - - class Inst_VOPC__V_CMP_EQ_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_F16 - - class Inst_VOPC__V_CMP_LE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_F16 - - class Inst_VOPC__V_CMP_GT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_F16 - - class Inst_VOPC__V_CMP_LG_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LG_F16 - - class Inst_VOPC__V_CMP_GE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_F16 - - class Inst_VOPC__V_CMP_O_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_O_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_O_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_O_F16 - - class Inst_VOPC__V_CMP_U_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_U_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_U_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_U_F16 - - class Inst_VOPC__V_CMP_NGE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NGE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NGE_F16 - - class Inst_VOPC__V_CMP_NLG_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLG_F16 - - class Inst_VOPC__V_CMP_NGT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NGT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NGT_F16 - - class Inst_VOPC__V_CMP_NLE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLE_F16 - - class Inst_VOPC__V_CMP_NEQ_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NEQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NEQ_F16 - - class Inst_VOPC__V_CMP_NLT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLT_F16 - - class Inst_VOPC__V_CMP_TRU_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_TRU_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_TRU_F16 - - class Inst_VOPC__V_CMPX_F_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_F16 - - class Inst_VOPC__V_CMPX_LT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_F16 - - class Inst_VOPC__V_CMPX_EQ_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_F16 - - class Inst_VOPC__V_CMPX_LE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_F16 - - class Inst_VOPC__V_CMPX_GT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_F16 - - class Inst_VOPC__V_CMPX_LG_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LG_F16 - - class Inst_VOPC__V_CMPX_GE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_F16 - - class Inst_VOPC__V_CMPX_O_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_O_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_O_F16 - - class Inst_VOPC__V_CMPX_U_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_U_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_U_F16 - - class Inst_VOPC__V_CMPX_NGE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NGE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NGE_F16 - - class Inst_VOPC__V_CMPX_NLG_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLG_F16 - - class Inst_VOPC__V_CMPX_NGT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NGT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NGT_F16 - - class Inst_VOPC__V_CMPX_NLE_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLE_F16 - - class Inst_VOPC__V_CMPX_NEQ_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NEQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NEQ_F16 - - class Inst_VOPC__V_CMPX_NLT_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLT_F16 - - class Inst_VOPC__V_CMPX_TRU_F16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_TRU_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_TRU_F16 - - class Inst_VOPC__V_CMP_F_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_F32 - - class Inst_VOPC__V_CMP_LT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_F32 - - class Inst_VOPC__V_CMP_EQ_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_F32 - - class Inst_VOPC__V_CMP_LE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_F32 - - class Inst_VOPC__V_CMP_GT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_F32 - - class Inst_VOPC__V_CMP_LG_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LG_F32 - - class Inst_VOPC__V_CMP_GE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_F32 - - class Inst_VOPC__V_CMP_O_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_O_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_O_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_O_F32 - - class Inst_VOPC__V_CMP_U_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_U_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_U_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_U_F32 - - class Inst_VOPC__V_CMP_NGE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NGE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NGE_F32 - - class Inst_VOPC__V_CMP_NLG_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLG_F32 - - class Inst_VOPC__V_CMP_NGT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NGT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NGT_F32 - - class Inst_VOPC__V_CMP_NLE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLE_F32 - - class Inst_VOPC__V_CMP_NEQ_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NEQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NEQ_F32 - - class Inst_VOPC__V_CMP_NLT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLT_F32 - - class Inst_VOPC__V_CMP_TRU_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_TRU_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_TRU_F32 - - class Inst_VOPC__V_CMPX_F_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_F32 - - class Inst_VOPC__V_CMPX_LT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_F32 - - class Inst_VOPC__V_CMPX_EQ_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_F32 - - class Inst_VOPC__V_CMPX_LE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_F32 - - class Inst_VOPC__V_CMPX_GT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_F32 - - class Inst_VOPC__V_CMPX_LG_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LG_F32 - - class Inst_VOPC__V_CMPX_GE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_F32 - - class Inst_VOPC__V_CMPX_O_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_O_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_O_F32 - - class Inst_VOPC__V_CMPX_U_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_U_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_U_F32 - - class Inst_VOPC__V_CMPX_NGE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NGE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NGE_F32 - - class Inst_VOPC__V_CMPX_NLG_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLG_F32 - - class Inst_VOPC__V_CMPX_NGT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NGT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NGT_F32 - - class Inst_VOPC__V_CMPX_NLE_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLE_F32 - - class Inst_VOPC__V_CMPX_NEQ_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NEQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NEQ_F32 - - class Inst_VOPC__V_CMPX_NLT_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLT_F32 - - class Inst_VOPC__V_CMPX_TRU_F32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_TRU_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_TRU_F32 - - class Inst_VOPC__V_CMP_F_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_F64 - - class Inst_VOPC__V_CMP_LT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_F64 - - class Inst_VOPC__V_CMP_EQ_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_F64 - - class Inst_VOPC__V_CMP_LE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_F64 - - class Inst_VOPC__V_CMP_GT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_F64 - - class Inst_VOPC__V_CMP_LG_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LG_F64 - - class Inst_VOPC__V_CMP_GE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_F64 - - class Inst_VOPC__V_CMP_O_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_O_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_O_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_O_F64 - - class Inst_VOPC__V_CMP_U_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_U_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_U_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_U_F64 - - class Inst_VOPC__V_CMP_NGE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NGE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NGE_F64 - - class Inst_VOPC__V_CMP_NLG_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLG_F64 - - class Inst_VOPC__V_CMP_NGT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NGT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NGT_F64 - - class Inst_VOPC__V_CMP_NLE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLE_F64 - - class Inst_VOPC__V_CMP_NEQ_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NEQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NEQ_F64 - - class Inst_VOPC__V_CMP_NLT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NLT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NLT_F64 - - class Inst_VOPC__V_CMP_TRU_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_TRU_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_TRU_F64 - - class Inst_VOPC__V_CMPX_F_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_F64 - - class Inst_VOPC__V_CMPX_LT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_F64 - - class Inst_VOPC__V_CMPX_EQ_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_F64 - - class Inst_VOPC__V_CMPX_LE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_F64 - - class Inst_VOPC__V_CMPX_GT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_F64 - - class Inst_VOPC__V_CMPX_LG_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LG_F64 - - class Inst_VOPC__V_CMPX_GE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_F64 - - class Inst_VOPC__V_CMPX_O_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_O_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_O_F64 - - class Inst_VOPC__V_CMPX_U_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_U_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_U_F64 - - class Inst_VOPC__V_CMPX_NGE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NGE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NGE_F64 - - class Inst_VOPC__V_CMPX_NLG_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLG_F64 - - class Inst_VOPC__V_CMPX_NGT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NGT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NGT_F64 - - class Inst_VOPC__V_CMPX_NLE_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLE_F64 - - class Inst_VOPC__V_CMPX_NEQ_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NEQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NEQ_F64 - - class Inst_VOPC__V_CMPX_NLT_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NLT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NLT_F64 - - class Inst_VOPC__V_CMPX_TRU_F64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_TRU_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_TRU_F64 - - class Inst_VOPC__V_CMP_F_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_I16 - - class Inst_VOPC__V_CMP_LT_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_I16 - - class Inst_VOPC__V_CMP_EQ_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_I16 - - class Inst_VOPC__V_CMP_LE_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_I16 - - class Inst_VOPC__V_CMP_GT_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_I16 - - class Inst_VOPC__V_CMP_NE_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NE_I16 - - class Inst_VOPC__V_CMP_GE_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_I16 - - class Inst_VOPC__V_CMP_T_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_T_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_T_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_T_I16 - - class Inst_VOPC__V_CMP_F_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_U16 - - class Inst_VOPC__V_CMP_LT_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_U16 - - class Inst_VOPC__V_CMP_EQ_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_U16 - - class Inst_VOPC__V_CMP_LE_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_U16 - - class Inst_VOPC__V_CMP_GT_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_U16 - - class Inst_VOPC__V_CMP_NE_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NE_U16 - - class Inst_VOPC__V_CMP_GE_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_U16 - - class Inst_VOPC__V_CMP_T_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_T_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_T_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_T_U16 - - class Inst_VOPC__V_CMPX_F_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_I16 - - class Inst_VOPC__V_CMPX_LT_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_I16 - - class Inst_VOPC__V_CMPX_EQ_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_I16 - - class Inst_VOPC__V_CMPX_LE_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_I16 - - class Inst_VOPC__V_CMPX_GT_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_I16 - - class Inst_VOPC__V_CMPX_NE_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NE_I16 - - class Inst_VOPC__V_CMPX_GE_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_I16 - - class Inst_VOPC__V_CMPX_T_I16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_T_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_T_I16 - - class Inst_VOPC__V_CMPX_F_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_U16 - - class Inst_VOPC__V_CMPX_LT_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_U16 - - class Inst_VOPC__V_CMPX_EQ_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_U16 - - class Inst_VOPC__V_CMPX_LE_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_U16 - - class Inst_VOPC__V_CMPX_GT_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_U16 - - class Inst_VOPC__V_CMPX_NE_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NE_U16 - - class Inst_VOPC__V_CMPX_GE_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_U16 - - class Inst_VOPC__V_CMPX_T_U16 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_T_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_T_U16 - - class Inst_VOPC__V_CMP_F_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_I32 - - class Inst_VOPC__V_CMP_LT_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_I32 - - class Inst_VOPC__V_CMP_EQ_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_I32 - - class Inst_VOPC__V_CMP_LE_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_I32 - - class Inst_VOPC__V_CMP_GT_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_I32 - - class Inst_VOPC__V_CMP_NE_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NE_I32 - - class Inst_VOPC__V_CMP_GE_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_I32 - - class Inst_VOPC__V_CMP_T_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_T_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_T_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_T_I32 - - class Inst_VOPC__V_CMP_F_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_U32 - - class Inst_VOPC__V_CMP_LT_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_U32 - - class Inst_VOPC__V_CMP_EQ_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_U32 - - class Inst_VOPC__V_CMP_LE_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_U32 - - class Inst_VOPC__V_CMP_GT_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_U32 - - class Inst_VOPC__V_CMP_NE_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NE_U32 - - class Inst_VOPC__V_CMP_GE_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_U32 - - class Inst_VOPC__V_CMP_T_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_T_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_T_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_T_U32 - - class Inst_VOPC__V_CMPX_F_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_I32 - - class Inst_VOPC__V_CMPX_LT_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_I32 - - class Inst_VOPC__V_CMPX_EQ_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_I32 - - class Inst_VOPC__V_CMPX_LE_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_I32 - - class Inst_VOPC__V_CMPX_GT_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_I32 - - class Inst_VOPC__V_CMPX_NE_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NE_I32 - - class Inst_VOPC__V_CMPX_GE_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_I32 - - class Inst_VOPC__V_CMPX_T_I32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_T_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_T_I32 - - class Inst_VOPC__V_CMPX_F_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_U32 - - class Inst_VOPC__V_CMPX_LT_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_U32 - - class Inst_VOPC__V_CMPX_EQ_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_U32 - - class Inst_VOPC__V_CMPX_LE_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_U32 - - class Inst_VOPC__V_CMPX_GT_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_U32 - - class Inst_VOPC__V_CMPX_NE_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NE_U32 - - class Inst_VOPC__V_CMPX_GE_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_U32 - - class Inst_VOPC__V_CMPX_T_U32 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_T_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_T_U32 - - class Inst_VOPC__V_CMP_F_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_I64 - - class Inst_VOPC__V_CMP_LT_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_I64 - - class Inst_VOPC__V_CMP_EQ_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_I64 - - class Inst_VOPC__V_CMP_LE_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_I64 - - class Inst_VOPC__V_CMP_GT_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_I64 - - class Inst_VOPC__V_CMP_NE_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NE_I64 - - class Inst_VOPC__V_CMP_GE_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_I64 - - class Inst_VOPC__V_CMP_T_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_T_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_T_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_T_I64 - - class Inst_VOPC__V_CMP_F_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_F_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_F_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_F_U64 - - class Inst_VOPC__V_CMP_LT_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LT_U64 - - class Inst_VOPC__V_CMP_EQ_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_EQ_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_EQ_U64 - - class Inst_VOPC__V_CMP_LE_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_LE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_LE_U64 - - class Inst_VOPC__V_CMP_GT_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GT_U64 - - class Inst_VOPC__V_CMP_NE_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_NE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_NE_U64 - - class Inst_VOPC__V_CMP_GE_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_GE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_GE_U64 - - class Inst_VOPC__V_CMP_T_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMP_T_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMP_T_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMP_T_U64 - - class Inst_VOPC__V_CMPX_F_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_I64 - - class Inst_VOPC__V_CMPX_LT_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_I64 - - class Inst_VOPC__V_CMPX_EQ_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_I64 - - class Inst_VOPC__V_CMPX_LE_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_I64 - - class Inst_VOPC__V_CMPX_GT_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_I64 - - class Inst_VOPC__V_CMPX_NE_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NE_I64 - - class Inst_VOPC__V_CMPX_GE_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_I64 - - class Inst_VOPC__V_CMPX_T_I64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_T_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_T_I64 - - class Inst_VOPC__V_CMPX_F_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_F_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_F_U64 - - class Inst_VOPC__V_CMPX_LT_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LT_U64 - - class Inst_VOPC__V_CMPX_EQ_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_EQ_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_EQ_U64 - - class Inst_VOPC__V_CMPX_LE_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_LE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_LE_U64 - - class Inst_VOPC__V_CMPX_GT_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GT_U64 - - class Inst_VOPC__V_CMPX_NE_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_NE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_NE_U64 - - class Inst_VOPC__V_CMPX_GE_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_GE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_GE_U64 - - class Inst_VOPC__V_CMPX_T_U64 : public Inst_VOPC - { - public: - Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC*); - ~Inst_VOPC__V_CMPX_T_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOPC__V_CMPX_T_U64 - - class Inst_VINTRP__V_INTERP_P1_F32 : public Inst_VINTRP - { - public: - Inst_VINTRP__V_INTERP_P1_F32(InFmt_VINTRP*); - ~Inst_VINTRP__V_INTERP_P1_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 16; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VINTRP__V_INTERP_P1_F32 - - class Inst_VINTRP__V_INTERP_P2_F32 : public Inst_VINTRP - { - public: - Inst_VINTRP__V_INTERP_P2_F32(InFmt_VINTRP*); - ~Inst_VINTRP__V_INTERP_P2_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 16; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VINTRP__V_INTERP_P2_F32 - - class Inst_VINTRP__V_INTERP_MOV_F32 : public Inst_VINTRP - { - public: - Inst_VINTRP__V_INTERP_MOV_F32(InFmt_VINTRP*); - ~Inst_VINTRP__V_INTERP_MOV_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //param - return 4; - case 1: //attr - return 16; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VINTRP__V_INTERP_MOV_F32 - - class Inst_VOP3__V_CMP_CLASS_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_CLASS_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_CLASS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_CLASS_F32 - - class Inst_VOP3__V_CMPX_CLASS_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_CLASS_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_CLASS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_CLASS_F32 - - class Inst_VOP3__V_CMP_CLASS_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_CLASS_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_CLASS_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_CLASS_F64 - - class Inst_VOP3__V_CMPX_CLASS_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_CLASS_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_CLASS_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_CLASS_F64 - - class Inst_VOP3__V_CMP_CLASS_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_CLASS_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_CLASS_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_CLASS_F16 - - class Inst_VOP3__V_CMPX_CLASS_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_CLASS_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_CLASS_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_CLASS_F16 - - class Inst_VOP3__V_CMP_F_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_F16 - - class Inst_VOP3__V_CMP_LT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_F16 - - class Inst_VOP3__V_CMP_EQ_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_F16 - - class Inst_VOP3__V_CMP_LE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_F16 - - class Inst_VOP3__V_CMP_GT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_F16 - - class Inst_VOP3__V_CMP_LG_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LG_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LG_F16 - - class Inst_VOP3__V_CMP_GE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_F16 - - class Inst_VOP3__V_CMP_O_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_O_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_O_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_O_F16 - - class Inst_VOP3__V_CMP_U_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_U_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_U_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_U_F16 - - class Inst_VOP3__V_CMP_NGE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NGE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NGE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NGE_F16 - - class Inst_VOP3__V_CMP_NLG_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLG_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLG_F16 - - class Inst_VOP3__V_CMP_NGT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NGT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NGT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NGT_F16 - - class Inst_VOP3__V_CMP_NLE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLE_F16 - - class Inst_VOP3__V_CMP_NEQ_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NEQ_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NEQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NEQ_F16 - - class Inst_VOP3__V_CMP_NLT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLT_F16 - - class Inst_VOP3__V_CMP_TRU_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_TRU_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_TRU_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_TRU_F16 - - class Inst_VOP3__V_CMPX_F_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_F16 - - class Inst_VOP3__V_CMPX_LT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_F16 - - class Inst_VOP3__V_CMPX_EQ_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_F16 - - class Inst_VOP3__V_CMPX_LE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_F16 - - class Inst_VOP3__V_CMPX_GT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_F16 - - class Inst_VOP3__V_CMPX_LG_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LG_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LG_F16 - - class Inst_VOP3__V_CMPX_GE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_F16 - - class Inst_VOP3__V_CMPX_O_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_O_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_O_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_O_F16 - - class Inst_VOP3__V_CMPX_U_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_U_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_U_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_U_F16 - - class Inst_VOP3__V_CMPX_NGE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NGE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NGE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NGE_F16 - - class Inst_VOP3__V_CMPX_NLG_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLG_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLG_F16 - - class Inst_VOP3__V_CMPX_NGT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NGT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NGT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NGT_F16 - - class Inst_VOP3__V_CMPX_NLE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLE_F16 - - class Inst_VOP3__V_CMPX_NEQ_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NEQ_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NEQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NEQ_F16 - - class Inst_VOP3__V_CMPX_NLT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLT_F16 - - class Inst_VOP3__V_CMPX_TRU_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_TRU_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_TRU_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_TRU_F16 - - class Inst_VOP3__V_CMP_F_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_F32 - - class Inst_VOP3__V_CMP_LT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_F32 - - class Inst_VOP3__V_CMP_EQ_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_F32 - - class Inst_VOP3__V_CMP_LE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_F32 - - class Inst_VOP3__V_CMP_GT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_F32 - - class Inst_VOP3__V_CMP_LG_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LG_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LG_F32 - - class Inst_VOP3__V_CMP_GE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_F32 - - class Inst_VOP3__V_CMP_O_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_O_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_O_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_O_F32 - - class Inst_VOP3__V_CMP_U_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_U_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_U_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_U_F32 - - class Inst_VOP3__V_CMP_NGE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NGE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NGE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NGE_F32 - - class Inst_VOP3__V_CMP_NLG_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLG_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLG_F32 - - class Inst_VOP3__V_CMP_NGT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NGT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NGT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NGT_F32 - - class Inst_VOP3__V_CMP_NLE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLE_F32 - - class Inst_VOP3__V_CMP_NEQ_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NEQ_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NEQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NEQ_F32 - - class Inst_VOP3__V_CMP_NLT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLT_F32 - - class Inst_VOP3__V_CMP_TRU_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_TRU_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_TRU_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_TRU_F32 - - class Inst_VOP3__V_CMPX_F_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_F32 - - class Inst_VOP3__V_CMPX_LT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_F32 - - class Inst_VOP3__V_CMPX_EQ_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_F32 - - class Inst_VOP3__V_CMPX_LE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_F32 - - class Inst_VOP3__V_CMPX_GT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_F32 - - class Inst_VOP3__V_CMPX_LG_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LG_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LG_F32 - - class Inst_VOP3__V_CMPX_GE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_F32 - - class Inst_VOP3__V_CMPX_O_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_O_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_O_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_O_F32 - - class Inst_VOP3__V_CMPX_U_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_U_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_U_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_U_F32 - - class Inst_VOP3__V_CMPX_NGE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NGE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NGE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NGE_F32 - - class Inst_VOP3__V_CMPX_NLG_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLG_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLG_F32 - - class Inst_VOP3__V_CMPX_NGT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NGT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NGT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NGT_F32 - - class Inst_VOP3__V_CMPX_NLE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLE_F32 - - class Inst_VOP3__V_CMPX_NEQ_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NEQ_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NEQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NEQ_F32 - - class Inst_VOP3__V_CMPX_NLT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLT_F32 - - class Inst_VOP3__V_CMPX_TRU_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_TRU_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_TRU_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_TRU_F32 - - class Inst_VOP3__V_CMP_F_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_F64 - - class Inst_VOP3__V_CMP_LT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_F64 - - class Inst_VOP3__V_CMP_EQ_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_F64 - - class Inst_VOP3__V_CMP_LE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_F64 - - class Inst_VOP3__V_CMP_GT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_F64 - - class Inst_VOP3__V_CMP_LG_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LG_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LG_F64 - - class Inst_VOP3__V_CMP_GE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_F64 - - class Inst_VOP3__V_CMP_O_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_O_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_O_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_O_F64 - - class Inst_VOP3__V_CMP_U_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_U_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_U_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_U_F64 - - class Inst_VOP3__V_CMP_NGE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NGE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NGE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NGE_F64 - - class Inst_VOP3__V_CMP_NLG_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLG_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLG_F64 - - class Inst_VOP3__V_CMP_NGT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NGT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NGT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NGT_F64 - - class Inst_VOP3__V_CMP_NLE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLE_F64 - - class Inst_VOP3__V_CMP_NEQ_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NEQ_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NEQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NEQ_F64 - - class Inst_VOP3__V_CMP_NLT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NLT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NLT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NLT_F64 - - class Inst_VOP3__V_CMP_TRU_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_TRU_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_TRU_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_TRU_F64 - - class Inst_VOP3__V_CMPX_F_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_F64 - - class Inst_VOP3__V_CMPX_LT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_F64 - - class Inst_VOP3__V_CMPX_EQ_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_F64 - - class Inst_VOP3__V_CMPX_LE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_F64 - - class Inst_VOP3__V_CMPX_GT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_F64 - - class Inst_VOP3__V_CMPX_LG_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LG_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LG_F64 - - class Inst_VOP3__V_CMPX_GE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_F64 - - class Inst_VOP3__V_CMPX_O_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_O_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_O_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_O_F64 - - class Inst_VOP3__V_CMPX_U_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_U_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_U_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_U_F64 - - class Inst_VOP3__V_CMPX_NGE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NGE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NGE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NGE_F64 - - class Inst_VOP3__V_CMPX_NLG_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLG_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLG_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLG_F64 - - class Inst_VOP3__V_CMPX_NGT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NGT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NGT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NGT_F64 - - class Inst_VOP3__V_CMPX_NLE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLE_F64 - - class Inst_VOP3__V_CMPX_NEQ_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NEQ_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NEQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NEQ_F64 - - class Inst_VOP3__V_CMPX_NLT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NLT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NLT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NLT_F64 - - class Inst_VOP3__V_CMPX_TRU_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_TRU_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_TRU_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_TRU_F64 - - class Inst_VOP3__V_CMP_F_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_I16 - - class Inst_VOP3__V_CMP_LT_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_I16 - - class Inst_VOP3__V_CMP_EQ_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_I16 - - class Inst_VOP3__V_CMP_LE_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_I16 - - class Inst_VOP3__V_CMP_GT_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_I16 - - class Inst_VOP3__V_CMP_NE_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NE_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NE_I16 - - class Inst_VOP3__V_CMP_GE_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_I16 - - class Inst_VOP3__V_CMP_T_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_T_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_T_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_T_I16 - - class Inst_VOP3__V_CMP_F_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_U16 - - class Inst_VOP3__V_CMP_LT_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_U16 - - class Inst_VOP3__V_CMP_EQ_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_U16 - - class Inst_VOP3__V_CMP_LE_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_U16 - - class Inst_VOP3__V_CMP_GT_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_U16 - - class Inst_VOP3__V_CMP_NE_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NE_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NE_U16 - - class Inst_VOP3__V_CMP_GE_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_U16 - - class Inst_VOP3__V_CMP_T_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_T_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_T_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_T_U16 - - class Inst_VOP3__V_CMPX_F_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_I16 - - class Inst_VOP3__V_CMPX_LT_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_I16 - - class Inst_VOP3__V_CMPX_EQ_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_I16 - - class Inst_VOP3__V_CMPX_LE_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_I16 - - class Inst_VOP3__V_CMPX_GT_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_I16 - - class Inst_VOP3__V_CMPX_NE_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NE_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NE_I16 - - class Inst_VOP3__V_CMPX_GE_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_I16 - - class Inst_VOP3__V_CMPX_T_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_T_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_T_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_T_I16 - - class Inst_VOP3__V_CMPX_F_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_U16 - - class Inst_VOP3__V_CMPX_LT_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_U16 - - class Inst_VOP3__V_CMPX_EQ_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_U16 - - class Inst_VOP3__V_CMPX_LE_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_U16 - - class Inst_VOP3__V_CMPX_GT_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_U16 - - class Inst_VOP3__V_CMPX_NE_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NE_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NE_U16 - - class Inst_VOP3__V_CMPX_GE_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_U16 - - class Inst_VOP3__V_CMPX_T_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_T_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_T_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_T_U16 - - class Inst_VOP3__V_CMP_F_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_I32 - - class Inst_VOP3__V_CMP_LT_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_I32 - - class Inst_VOP3__V_CMP_EQ_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_I32 - - class Inst_VOP3__V_CMP_LE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_I32 - - class Inst_VOP3__V_CMP_GT_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_I32 - - class Inst_VOP3__V_CMP_NE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NE_I32 - - class Inst_VOP3__V_CMP_GE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_I32 - - class Inst_VOP3__V_CMP_T_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_T_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_T_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_T_I32 - - class Inst_VOP3__V_CMP_F_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_U32 - - class Inst_VOP3__V_CMP_LT_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_U32 - - class Inst_VOP3__V_CMP_EQ_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_U32 - - class Inst_VOP3__V_CMP_LE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_U32 - - class Inst_VOP3__V_CMP_GT_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_U32 - - class Inst_VOP3__V_CMP_NE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NE_U32 - - class Inst_VOP3__V_CMP_GE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_U32 - - class Inst_VOP3__V_CMP_T_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_T_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_T_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_T_U32 - - class Inst_VOP3__V_CMPX_F_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_I32 - - class Inst_VOP3__V_CMPX_LT_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_I32 - - class Inst_VOP3__V_CMPX_EQ_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_I32 - - class Inst_VOP3__V_CMPX_LE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_I32 - - class Inst_VOP3__V_CMPX_GT_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_I32 - - class Inst_VOP3__V_CMPX_NE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NE_I32 - - class Inst_VOP3__V_CMPX_GE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_I32 - - class Inst_VOP3__V_CMPX_T_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_T_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_T_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_T_I32 - - class Inst_VOP3__V_CMPX_F_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_U32 - - class Inst_VOP3__V_CMPX_LT_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_U32 - - class Inst_VOP3__V_CMPX_EQ_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_U32 - - class Inst_VOP3__V_CMPX_LE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_U32 - - class Inst_VOP3__V_CMPX_GT_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_U32 - - class Inst_VOP3__V_CMPX_NE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NE_U32 - - class Inst_VOP3__V_CMPX_GE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_U32 - - class Inst_VOP3__V_CMPX_T_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_T_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_T_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_T_U32 - - class Inst_VOP3__V_CMP_F_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_I64 - - class Inst_VOP3__V_CMP_LT_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_I64 - - class Inst_VOP3__V_CMP_EQ_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_I64 - - class Inst_VOP3__V_CMP_LE_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_I64 - - class Inst_VOP3__V_CMP_GT_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_I64 - - class Inst_VOP3__V_CMP_NE_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NE_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NE_I64 - - class Inst_VOP3__V_CMP_GE_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_I64 - - class Inst_VOP3__V_CMP_T_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_T_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_T_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_T_I64 - - class Inst_VOP3__V_CMP_F_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_F_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_F_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_F_U64 - - class Inst_VOP3__V_CMP_LT_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LT_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LT_U64 - - class Inst_VOP3__V_CMP_EQ_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_EQ_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_EQ_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_EQ_U64 - - class Inst_VOP3__V_CMP_LE_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_LE_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_LE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_LE_U64 - - class Inst_VOP3__V_CMP_GT_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GT_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GT_U64 - - class Inst_VOP3__V_CMP_NE_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_NE_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_NE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_NE_U64 - - class Inst_VOP3__V_CMP_GE_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_GE_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_GE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_GE_U64 - - class Inst_VOP3__V_CMP_T_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMP_T_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMP_T_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMP_T_U64 - - class Inst_VOP3__V_CMPX_F_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_I64 - - class Inst_VOP3__V_CMPX_LT_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_I64 - - class Inst_VOP3__V_CMPX_EQ_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_I64 - - class Inst_VOP3__V_CMPX_LE_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_I64 - - class Inst_VOP3__V_CMPX_GT_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_I64 - - class Inst_VOP3__V_CMPX_NE_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NE_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NE_I64 - - class Inst_VOP3__V_CMPX_GE_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_I64 - - class Inst_VOP3__V_CMPX_T_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_T_I64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_T_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_T_I64 - - class Inst_VOP3__V_CMPX_F_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_F_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_F_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_F_U64 - - class Inst_VOP3__V_CMPX_LT_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LT_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LT_U64 - - class Inst_VOP3__V_CMPX_EQ_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_EQ_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_EQ_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_EQ_U64 - - class Inst_VOP3__V_CMPX_LE_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_LE_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_LE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_LE_U64 - - class Inst_VOP3__V_CMPX_GT_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GT_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GT_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GT_U64 - - class Inst_VOP3__V_CMPX_NE_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_NE_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_NE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_NE_U64 - - class Inst_VOP3__V_CMPX_GE_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_GE_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_GE_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_GE_U64 - - class Inst_VOP3__V_CMPX_T_U64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CMPX_T_U64(InFmt_VOP3*); - ~Inst_VOP3__V_CMPX_T_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //sdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CMPX_T_U64 - - class Inst_VOP3__V_CNDMASK_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3*); - ~Inst_VOP3__V_CNDMASK_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //carryin - return 8; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CNDMASK_B32 - - class Inst_VOP3__V_ADD_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ADD_F32(InFmt_VOP3*); - ~Inst_VOP3__V_ADD_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADD_F32 - - class Inst_VOP3__V_SUB_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SUB_F32(InFmt_VOP3*); - ~Inst_VOP3__V_SUB_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUB_F32 - - class Inst_VOP3__V_SUBREV_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SUBREV_F32(InFmt_VOP3*); - ~Inst_VOP3__V_SUBREV_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBREV_F32 - - class Inst_VOP3__V_MUL_LEGACY_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_LEGACY_F32 - - class Inst_VOP3__V_MUL_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_F32 - - class Inst_VOP3__V_MUL_I32_I24 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_I32_I24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_I32_I24 - - class Inst_VOP3__V_MUL_HI_I32_I24 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_HI_I32_I24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_HI_I32_I24 - - class Inst_VOP3__V_MUL_U32_U24 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_U32_U24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_U32_U24 - - class Inst_VOP3__V_MUL_HI_U32_U24 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_HI_U32_U24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_HI_U32_U24 - - class Inst_VOP3__V_MIN_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_F32 - - class Inst_VOP3__V_MAX_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_F32 - - class Inst_VOP3__V_MIN_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_I32(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_I32 - - class Inst_VOP3__V_MAX_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_I32(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_I32 - - class Inst_VOP3__V_MIN_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_U32 - - class Inst_VOP3__V_MAX_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_U32 - - class Inst_VOP3__V_LSHRREV_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3*); - ~Inst_VOP3__V_LSHRREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LSHRREV_B32 - - class Inst_VOP3__V_ASHRREV_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3*); - ~Inst_VOP3__V_ASHRREV_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ASHRREV_I32 - - class Inst_VOP3__V_LSHLREV_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3*); - ~Inst_VOP3__V_LSHLREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LSHLREV_B32 - - class Inst_VOP3__V_AND_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_AND_B32(InFmt_VOP3*); - ~Inst_VOP3__V_AND_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_AND_B32 - - class Inst_VOP3__V_OR_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_OR_B32(InFmt_VOP3*); - ~Inst_VOP3__V_OR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_OR_B32 - - class Inst_VOP3__V_XOR_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_XOR_B32(InFmt_VOP3*); - ~Inst_VOP3__V_XOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_XOR_B32 - - class Inst_VOP3__V_MAC_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAC_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MAC_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAC_F32 - - class Inst_VOP3__V_ADD_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_ADD_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - case 3: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADD_U32 - - class Inst_VOP3__V_SUB_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_SUB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - case 3: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUB_U32 - - class Inst_VOP3__V_SUBREV_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_SUBREV_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_SUBREV_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - case 3: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBREV_U32 - - class Inst_VOP3__V_ADDC_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_ADDC_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //carryin - return 8; - case 3: //vdst - return 4; - case 4: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADDC_U32 - - class Inst_VOP3__V_SUBB_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_SUBB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //carryin - return 8; - case 3: //vdst - return 4; - case 4: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBB_U32 - - class Inst_VOP3__V_SUBBREV_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_SUBBREV_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_SUBBREV_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //carryin - return 8; - case 3: //vdst - return 4; - case 4: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBBREV_U32 - - class Inst_VOP3__V_ADD_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ADD_F16(InFmt_VOP3*); - ~Inst_VOP3__V_ADD_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADD_F16 - - class Inst_VOP3__V_SUB_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SUB_F16(InFmt_VOP3*); - ~Inst_VOP3__V_SUB_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUB_F16 - - class Inst_VOP3__V_SUBREV_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SUBREV_F16(InFmt_VOP3*); - ~Inst_VOP3__V_SUBREV_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBREV_F16 - - class Inst_VOP3__V_MUL_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_F16(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_F16 - - class Inst_VOP3__V_MAC_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAC_F16(InFmt_VOP3*); - ~Inst_VOP3__V_MAC_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAC_F16 - - class Inst_VOP3__V_ADD_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ADD_U16(InFmt_VOP3*); - ~Inst_VOP3__V_ADD_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADD_U16 - - class Inst_VOP3__V_SUB_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SUB_U16(InFmt_VOP3*); - ~Inst_VOP3__V_SUB_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUB_U16 - - class Inst_VOP3__V_SUBREV_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SUBREV_U16(InFmt_VOP3*); - ~Inst_VOP3__V_SUBREV_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SUBREV_U16 - - class Inst_VOP3__V_MUL_LO_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_LO_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_LO_U16 - - class Inst_VOP3__V_LSHLREV_B16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3*); - ~Inst_VOP3__V_LSHLREV_B16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LSHLREV_B16 - - class Inst_VOP3__V_LSHRREV_B16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3*); - ~Inst_VOP3__V_LSHRREV_B16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LSHRREV_B16 - - class Inst_VOP3__V_ASHRREV_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3*); - ~Inst_VOP3__V_ASHRREV_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ASHRREV_I16 - - class Inst_VOP3__V_MAX_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_F16(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_F16 - - class Inst_VOP3__V_MIN_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_F16(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_F16 - - class Inst_VOP3__V_MAX_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_U16(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_U16 - - class Inst_VOP3__V_MAX_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_I16(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_I16 - - class Inst_VOP3__V_MIN_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_U16(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_U16 - - class Inst_VOP3__V_MIN_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_I16(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_I16 - - class Inst_VOP3__V_LDEXP_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LDEXP_F16(InFmt_VOP3*); - ~Inst_VOP3__V_LDEXP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LDEXP_F16 - - class Inst_VOP3__V_NOP : public Inst_VOP3 - { - public: - Inst_VOP3__V_NOP(InFmt_VOP3*); - ~Inst_VOP3__V_NOP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_NOP - - class Inst_VOP3__V_MOV_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MOV_B32(InFmt_VOP3*); - ~Inst_VOP3__V_MOV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MOV_B32 - - class Inst_VOP3__V_CVT_I32_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_I32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_I32_F64 - - class Inst_VOP3__V_CVT_F64_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F64_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F64_I32 - - class Inst_VOP3__V_CVT_F32_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_I32 - - class Inst_VOP3__V_CVT_F32_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_U32 - - class Inst_VOP3__V_CVT_U32_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_U32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_U32_F32 - - class Inst_VOP3__V_CVT_I32_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_I32_F32 - - class Inst_VOP3__V_MOV_FED_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3*); - ~Inst_VOP3__V_MOV_FED_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MOV_FED_B32 - - class Inst_VOP3__V_CVT_F16_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F16_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F16_F32 - - class Inst_VOP3__V_CVT_F32_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_F16 - - class Inst_VOP3__V_CVT_RPI_I32_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_RPI_I32_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_RPI_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_RPI_I32_F32 - - class Inst_VOP3__V_CVT_FLR_I32_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_FLR_I32_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_FLR_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_FLR_I32_F32 - - class Inst_VOP3__V_CVT_OFF_F32_I4 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_OFF_F32_I4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_OFF_F32_I4 - - class Inst_VOP3__V_CVT_F32_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_F64 - - class Inst_VOP3__V_CVT_F64_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F64_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F64_F32 - - class Inst_VOP3__V_CVT_F32_UBYTE0 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_UBYTE0(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_UBYTE0 - - class Inst_VOP3__V_CVT_F32_UBYTE1 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_UBYTE1(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_UBYTE1 - - class Inst_VOP3__V_CVT_F32_UBYTE2 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_UBYTE2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_UBYTE2 - - class Inst_VOP3__V_CVT_F32_UBYTE3 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F32_UBYTE3(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F32_UBYTE3 - - class Inst_VOP3__V_CVT_U32_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_U32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_U32_F64 - - class Inst_VOP3__V_CVT_F64_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F64_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F64_U32 - - class Inst_VOP3__V_TRUNC_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_TRUNC_F64(InFmt_VOP3*); - ~Inst_VOP3__V_TRUNC_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_TRUNC_F64 - - class Inst_VOP3__V_CEIL_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CEIL_F64(InFmt_VOP3*); - ~Inst_VOP3__V_CEIL_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CEIL_F64 - - class Inst_VOP3__V_RNDNE_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RNDNE_F64(InFmt_VOP3*); - ~Inst_VOP3__V_RNDNE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RNDNE_F64 - - class Inst_VOP3__V_FLOOR_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FLOOR_F64(InFmt_VOP3*); - ~Inst_VOP3__V_FLOOR_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FLOOR_F64 - - class Inst_VOP3__V_FRACT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FRACT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_FRACT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FRACT_F32 - - class Inst_VOP3__V_TRUNC_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_TRUNC_F32(InFmt_VOP3*); - ~Inst_VOP3__V_TRUNC_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_TRUNC_F32 - - class Inst_VOP3__V_CEIL_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CEIL_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CEIL_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CEIL_F32 - - class Inst_VOP3__V_RNDNE_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RNDNE_F32(InFmt_VOP3*); - ~Inst_VOP3__V_RNDNE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RNDNE_F32 - - class Inst_VOP3__V_FLOOR_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FLOOR_F32(InFmt_VOP3*); - ~Inst_VOP3__V_FLOOR_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FLOOR_F32 - - class Inst_VOP3__V_EXP_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_EXP_F32(InFmt_VOP3*); - ~Inst_VOP3__V_EXP_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_EXP_F32 - - class Inst_VOP3__V_LOG_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LOG_F32(InFmt_VOP3*); - ~Inst_VOP3__V_LOG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LOG_F32 - - class Inst_VOP3__V_RCP_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RCP_F32(InFmt_VOP3*); - ~Inst_VOP3__V_RCP_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RCP_F32 - - class Inst_VOP3__V_RCP_IFLAG_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3*); - ~Inst_VOP3__V_RCP_IFLAG_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RCP_IFLAG_F32 - - class Inst_VOP3__V_RSQ_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RSQ_F32(InFmt_VOP3*); - ~Inst_VOP3__V_RSQ_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RSQ_F32 - - class Inst_VOP3__V_RCP_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RCP_F64(InFmt_VOP3*); - ~Inst_VOP3__V_RCP_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RCP_F64 - - class Inst_VOP3__V_RSQ_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RSQ_F64(InFmt_VOP3*); - ~Inst_VOP3__V_RSQ_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RSQ_F64 - - class Inst_VOP3__V_SQRT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SQRT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_SQRT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SQRT_F32 - - class Inst_VOP3__V_SQRT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SQRT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_SQRT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SQRT_F64 - - class Inst_VOP3__V_SIN_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SIN_F32(InFmt_VOP3*); - ~Inst_VOP3__V_SIN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SIN_F32 - - class Inst_VOP3__V_COS_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_COS_F32(InFmt_VOP3*); - ~Inst_VOP3__V_COS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_COS_F32 - - class Inst_VOP3__V_NOT_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_NOT_B32(InFmt_VOP3*); - ~Inst_VOP3__V_NOT_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_NOT_B32 - - class Inst_VOP3__V_BFREV_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_BFREV_B32(InFmt_VOP3*); - ~Inst_VOP3__V_BFREV_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_BFREV_B32 - - class Inst_VOP3__V_FFBH_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FFBH_U32(InFmt_VOP3*); - ~Inst_VOP3__V_FFBH_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FFBH_U32 - - class Inst_VOP3__V_FFBL_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FFBL_B32(InFmt_VOP3*); - ~Inst_VOP3__V_FFBL_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FFBL_B32 - - class Inst_VOP3__V_FFBH_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FFBH_I32(InFmt_VOP3*); - ~Inst_VOP3__V_FFBH_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FFBH_I32 - - class Inst_VOP3__V_FREXP_EXP_I32_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FREXP_EXP_I32_F64(InFmt_VOP3*); - ~Inst_VOP3__V_FREXP_EXP_I32_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FREXP_EXP_I32_F64 - - class Inst_VOP3__V_FREXP_MANT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_FREXP_MANT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FREXP_MANT_F64 - - class Inst_VOP3__V_FRACT_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FRACT_F64(InFmt_VOP3*); - ~Inst_VOP3__V_FRACT_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 8; - case 1: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FRACT_F64 - - class Inst_VOP3__V_FREXP_EXP_I32_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FREXP_EXP_I32_F32(InFmt_VOP3*); - ~Inst_VOP3__V_FREXP_EXP_I32_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FREXP_EXP_I32_F32 - - class Inst_VOP3__V_FREXP_MANT_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3*); - ~Inst_VOP3__V_FREXP_MANT_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FREXP_MANT_F32 - - class Inst_VOP3__V_CLREXCP : public Inst_VOP3 - { - public: - Inst_VOP3__V_CLREXCP(InFmt_VOP3*); - ~Inst_VOP3__V_CLREXCP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CLREXCP - - class Inst_VOP3__V_CVT_F16_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F16_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F16_U16 - - class Inst_VOP3__V_CVT_F16_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_F16_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_F16_I16 - - class Inst_VOP3__V_CVT_U16_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_U16_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_U16_F16 - - class Inst_VOP3__V_CVT_I16_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_I16_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_I16_F16 - - class Inst_VOP3__V_RCP_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RCP_F16(InFmt_VOP3*); - ~Inst_VOP3__V_RCP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RCP_F16 - - class Inst_VOP3__V_SQRT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SQRT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_SQRT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SQRT_F16 - - class Inst_VOP3__V_RSQ_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RSQ_F16(InFmt_VOP3*); - ~Inst_VOP3__V_RSQ_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RSQ_F16 - - class Inst_VOP3__V_LOG_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LOG_F16(InFmt_VOP3*); - ~Inst_VOP3__V_LOG_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LOG_F16 - - class Inst_VOP3__V_EXP_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_EXP_F16(InFmt_VOP3*); - ~Inst_VOP3__V_EXP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_EXP_F16 - - class Inst_VOP3__V_FREXP_MANT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_FREXP_MANT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FREXP_MANT_F16 - - class Inst_VOP3__V_FREXP_EXP_I16_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FREXP_EXP_I16_F16(InFmt_VOP3*); - ~Inst_VOP3__V_FREXP_EXP_I16_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FREXP_EXP_I16_F16 - - class Inst_VOP3__V_FLOOR_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FLOOR_F16(InFmt_VOP3*); - ~Inst_VOP3__V_FLOOR_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FLOOR_F16 - - class Inst_VOP3__V_CEIL_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CEIL_F16(InFmt_VOP3*); - ~Inst_VOP3__V_CEIL_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CEIL_F16 - - class Inst_VOP3__V_TRUNC_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_TRUNC_F16(InFmt_VOP3*); - ~Inst_VOP3__V_TRUNC_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_TRUNC_F16 - - class Inst_VOP3__V_RNDNE_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_RNDNE_F16(InFmt_VOP3*); - ~Inst_VOP3__V_RNDNE_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_RNDNE_F16 - - class Inst_VOP3__V_FRACT_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FRACT_F16(InFmt_VOP3*); - ~Inst_VOP3__V_FRACT_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FRACT_F16 - - class Inst_VOP3__V_SIN_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SIN_F16(InFmt_VOP3*); - ~Inst_VOP3__V_SIN_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SIN_F16 - - class Inst_VOP3__V_COS_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_COS_F16(InFmt_VOP3*); - ~Inst_VOP3__V_COS_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 2; - case 1: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_COS_F16 - - class Inst_VOP3__V_EXP_LEGACY_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3*); - ~Inst_VOP3__V_EXP_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_EXP_LEGACY_F32 - - class Inst_VOP3__V_LOG_LEGACY_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3*); - ~Inst_VOP3__V_LOG_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src - return 4; - case 1: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LOG_LEGACY_F32 - - class Inst_VOP3__V_MAD_LEGACY_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_LEGACY_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_LEGACY_F32 - - class Inst_VOP3__V_MAD_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_F32 - - class Inst_VOP3__V_MAD_I32_I24 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_I32_I24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_I32_I24 - - class Inst_VOP3__V_MAD_U32_U24 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_U32_U24(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_U32_U24 - - class Inst_VOP3__V_CUBEID_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CUBEID_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CUBEID_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CUBEID_F32 - - class Inst_VOP3__V_CUBESC_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CUBESC_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CUBESC_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CUBESC_F32 - - class Inst_VOP3__V_CUBETC_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CUBETC_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CUBETC_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CUBETC_F32 - - class Inst_VOP3__V_CUBEMA_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CUBEMA_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CUBEMA_F32 - - class Inst_VOP3__V_BFE_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_BFE_U32(InFmt_VOP3*); - ~Inst_VOP3__V_BFE_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_BFE_U32 - - class Inst_VOP3__V_BFE_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_BFE_I32(InFmt_VOP3*); - ~Inst_VOP3__V_BFE_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_BFE_I32 - - class Inst_VOP3__V_BFI_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_BFI_B32(InFmt_VOP3*); - ~Inst_VOP3__V_BFI_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_BFI_B32 - - class Inst_VOP3__V_FMA_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FMA_F32(InFmt_VOP3*); - ~Inst_VOP3__V_FMA_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FMA_F32 - - class Inst_VOP3__V_FMA_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FMA_F64(InFmt_VOP3*); - ~Inst_VOP3__V_FMA_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //src_2 - return 8; - case 3: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FMA_F64 - - class Inst_VOP3__V_LERP_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LERP_U8(InFmt_VOP3*); - ~Inst_VOP3__V_LERP_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LERP_U8 - - class Inst_VOP3__V_ALIGNBIT_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3*); - ~Inst_VOP3__V_ALIGNBIT_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ALIGNBIT_B32 - - class Inst_VOP3__V_ALIGNBYTE_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3*); - ~Inst_VOP3__V_ALIGNBYTE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ALIGNBYTE_B32 - - class Inst_VOP3__V_MIN3_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN3_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MIN3_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN3_F32 - - class Inst_VOP3__V_MIN3_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN3_I32(InFmt_VOP3*); - ~Inst_VOP3__V_MIN3_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN3_I32 - - class Inst_VOP3__V_MIN3_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN3_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MIN3_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN3_U32 - - class Inst_VOP3__V_MAX3_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX3_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MAX3_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX3_F32 - - class Inst_VOP3__V_MAX3_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX3_I32(InFmt_VOP3*); - ~Inst_VOP3__V_MAX3_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX3_I32 - - class Inst_VOP3__V_MAX3_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX3_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MAX3_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX3_U32 - - class Inst_VOP3__V_MED3_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MED3_F32(InFmt_VOP3*); - ~Inst_VOP3__V_MED3_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MED3_F32 - - class Inst_VOP3__V_MED3_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MED3_I32(InFmt_VOP3*); - ~Inst_VOP3__V_MED3_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MED3_I32 - - class Inst_VOP3__V_MED3_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MED3_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MED3_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MED3_U32 - - class Inst_VOP3__V_SAD_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SAD_U8(InFmt_VOP3*); - ~Inst_VOP3__V_SAD_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SAD_U8 - - class Inst_VOP3__V_SAD_HI_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3*); - ~Inst_VOP3__V_SAD_HI_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SAD_HI_U8 - - class Inst_VOP3__V_SAD_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SAD_U16(InFmt_VOP3*); - ~Inst_VOP3__V_SAD_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SAD_U16 - - class Inst_VOP3__V_SAD_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_SAD_U32(InFmt_VOP3*); - ~Inst_VOP3__V_SAD_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_SAD_U32 - - class Inst_VOP3__V_CVT_PK_U8_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PK_U8_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PK_U8_F32 - - class Inst_VOP3__V_DIV_FIXUP_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3*); - ~Inst_VOP3__V_DIV_FIXUP_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_FIXUP_F32 - - class Inst_VOP3__V_DIV_FIXUP_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3*); - ~Inst_VOP3__V_DIV_FIXUP_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //src_2 - return 8; - case 3: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_FIXUP_F64 - - class Inst_VOP3__V_DIV_SCALE_F32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_DIV_SCALE_F32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_DIV_SCALE_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - case 4: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_SCALE_F32 - - class Inst_VOP3__V_DIV_SCALE_F64 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_DIV_SCALE_F64(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_DIV_SCALE_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //src_2 - return 8; - case 3: //vdst - return 8; - case 4: //vcc - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_SCALE_F64 - - class Inst_VOP3__V_DIV_FMAS_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3*); - ~Inst_VOP3__V_DIV_FMAS_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: - return 8; - case 4: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_FMAS_F32 - - class Inst_VOP3__V_DIV_FMAS_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3*); - ~Inst_VOP3__V_DIV_FMAS_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //src_2 - return 8; - case 3: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_FMAS_F64 - - class Inst_VOP3__V_MSAD_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MSAD_U8(InFmt_VOP3*); - ~Inst_VOP3__V_MSAD_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MSAD_U8 - - class Inst_VOP3__V_QSAD_PK_U16_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3*); - ~Inst_VOP3__V_QSAD_PK_U16_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //src_2 - return 8; - case 3: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_QSAD_PK_U16_U8 - - class Inst_VOP3__V_MQSAD_PK_U16_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MQSAD_PK_U16_U8(InFmt_VOP3*); - ~Inst_VOP3__V_MQSAD_PK_U16_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //src_2 - return 8; - case 3: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MQSAD_PK_U16_U8 - - class Inst_VOP3__V_MQSAD_U32_U8 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3*); - ~Inst_VOP3__V_MQSAD_U32_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //src_2 - return 16; - case 3: //vdst - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MQSAD_U32_U8 - - class Inst_VOP3__V_MAD_U64_U32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_MAD_U64_U32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_MAD_U64_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - case 4: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_U64_U32 - - class Inst_VOP3__V_MAD_I64_I32 : public Inst_VOP3_SDST_ENC - { - public: - Inst_VOP3__V_MAD_I64_I32(InFmt_VOP3_SDST_ENC*); - ~Inst_VOP3__V_MAD_I64_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 2; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - case 4: //carryout - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_I64_I32 - - class Inst_VOP3__V_MAD_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_F16(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_F16 - - class Inst_VOP3__V_MAD_U16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_U16(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_U16 - - class Inst_VOP3__V_MAD_I16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAD_I16(InFmt_VOP3*); - ~Inst_VOP3__V_MAD_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAD_I16 - - class Inst_VOP3__V_PERM_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_PERM_B32(InFmt_VOP3*); - ~Inst_VOP3__V_PERM_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //src_2 - return 4; - case 3: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - // From the GCN3 ISA SPEC: - // byte permute(byte in[8], byte sel) { - // if (sel>=13) then return 0xff; - // elsif (sel==12) then return 0x00; - // elsif (sel==11) then return in[7][7] * 0xff; - // elsif (sel==10) then return in[5][7] * 0xff; - // elsif (sel==9) then return in[3][7] * 0xff; - // elsif (sel==8) then return in[1][7] * 0xff; - // else return in[sel]; - // } - // NOTE: I interpret the in[x][7] notation to mean "the high order - // bit of byte x". - uint8_t - permute(uint64_t in_dword2x, uint32_t sel) - { - assert (sel < 256); - uint8_t *in = reinterpret_cast(&in_dword2x); - DPRINTF(GCN3, "in_dword2x = 0x%08x\n", in_dword2x); - DPRINTF(GCN3, "Selecting %x using index %d\n", in[sel], sel); - if (sel >= 13) return 0xFF; - else if (sel == 12) return 0; - else if (sel == 11) return (in[7] & 0x80) ? 0xFF : 0; - else if (sel == 10) return (in[5] & 0x80) ? 0xFF : 0; - else if (sel == 9) return (in[3] & 0x80) ? 0xFF : 0; - else if (sel == 8) return (in[1] & 0x80) ? 0xFF : 0; - else return in[sel]; - } - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_PERM_B32 - - class Inst_VOP3__V_FMA_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_FMA_F16(InFmt_VOP3*); - ~Inst_VOP3__V_FMA_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_FMA_F16 - - class Inst_VOP3__V_DIV_FIXUP_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3*); - ~Inst_VOP3__V_DIV_FIXUP_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 2; - case 1: //src_1 - return 2; - case 2: //src_2 - return 2; - case 3: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_DIV_FIXUP_F16 - - class Inst_VOP3__V_CVT_PKACCUM_U8_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PKACCUM_U8_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PKACCUM_U8_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - class Inst_VOP3__V_INTERP_P1_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3*); - ~Inst_VOP3__V_INTERP_P1_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 32; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_INTERP_P1_F32 - - class Inst_VOP3__V_INTERP_P2_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3*); - ~Inst_VOP3__V_INTERP_P2_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 32; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_INTERP_P2_F32 - - class Inst_VOP3__V_INTERP_MOV_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3*); - ~Inst_VOP3__V_INTERP_MOV_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //param - return 4; - case 1: //attr - return 32; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_INTERP_MOV_F32 - - class Inst_VOP3__V_INTERP_P1LL_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_INTERP_P1LL_F16(InFmt_VOP3*); - ~Inst_VOP3__V_INTERP_P1LL_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 2; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_INTERP_P1LL_F16 - - class Inst_VOP3__V_INTERP_P1LV_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_INTERP_P1LV_F16(InFmt_VOP3*); - ~Inst_VOP3__V_INTERP_P1LV_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 2; - case 2: //vgpr_add - return 2; - case 3: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_INTERP_P1LV_F16 - - class Inst_VOP3__V_INTERP_P2_F16 : public Inst_VOP3 - { - public: - Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3*); - ~Inst_VOP3__V_INTERP_P2_F16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_ij - return 4; - case 1: //attr - return 2; - case 2: //vgpr_add - return 4; - case 3: //vgpr_dst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_INTERP_P2_F16 - - class Inst_VOP3__V_ADD_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ADD_F64(InFmt_VOP3*); - ~Inst_VOP3__V_ADD_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ADD_F64 - - class Inst_VOP3__V_MUL_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_F64(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_F64 - - class Inst_VOP3__V_MIN_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MIN_F64(InFmt_VOP3*); - ~Inst_VOP3__V_MIN_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MIN_F64 - - class Inst_VOP3__V_MAX_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MAX_F64(InFmt_VOP3*); - ~Inst_VOP3__V_MAX_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MAX_F64 - - class Inst_VOP3__V_LDEXP_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LDEXP_F64(InFmt_VOP3*); - ~Inst_VOP3__V_LDEXP_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LDEXP_F64 - - class Inst_VOP3__V_MUL_LO_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_LO_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_LO_U32 - - class Inst_VOP3__V_MUL_HI_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_HI_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_HI_U32 - - class Inst_VOP3__V_MUL_HI_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3*); - ~Inst_VOP3__V_MUL_HI_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MUL_HI_I32 - - class Inst_VOP3__V_LDEXP_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LDEXP_F32(InFmt_VOP3*); - ~Inst_VOP3__V_LDEXP_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LDEXP_F32 - - class Inst_VOP3__V_READLANE_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_READLANE_B32(InFmt_VOP3*); - ~Inst_VOP3__V_READLANE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vsrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //sdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_READLANE_B32 - - class Inst_VOP3__V_WRITELANE_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3*); - ~Inst_VOP3__V_WRITELANE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //ssrc_0 - return 4; - case 1: //ssrc_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_WRITELANE_B32 - - class Inst_VOP3__V_BCNT_U32_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3*); - ~Inst_VOP3__V_BCNT_U32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_BCNT_U32_B32 - - class Inst_VOP3__V_MBCNT_LO_U32_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MBCNT_LO_U32_B32(InFmt_VOP3*); - ~Inst_VOP3__V_MBCNT_LO_U32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MBCNT_LO_U32_B32 - - class Inst_VOP3__V_MBCNT_HI_U32_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_MBCNT_HI_U32_B32(InFmt_VOP3*); - ~Inst_VOP3__V_MBCNT_HI_U32_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_MBCNT_HI_U32_B32 - - class Inst_VOP3__V_LSHLREV_B64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3*); - ~Inst_VOP3__V_LSHLREV_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LSHLREV_B64 - - class Inst_VOP3__V_LSHRREV_B64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3*); - ~Inst_VOP3__V_LSHRREV_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_LSHRREV_B64 - - class Inst_VOP3__V_ASHRREV_I64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3*); - ~Inst_VOP3__V_ASHRREV_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 8; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_ASHRREV_I64 - - class Inst_VOP3__V_TRIG_PREOP_F64 : public Inst_VOP3 - { - public: - Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3*); - ~Inst_VOP3__V_TRIG_PREOP_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 8; - case 1: //src_1 - return 4; - case 2: //vdst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_TRIG_PREOP_F64 - - class Inst_VOP3__V_BFM_B32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_BFM_B32(InFmt_VOP3*); - ~Inst_VOP3__V_BFM_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_BFM_B32 - - class Inst_VOP3__V_CVT_PKNORM_I16_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PKNORM_I16_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PKNORM_I16_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PKNORM_I16_F32 - - class Inst_VOP3__V_CVT_PKNORM_U16_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PKNORM_U16_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PKNORM_U16_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PKNORM_U16_F32 - - class Inst_VOP3__V_CVT_PKRTZ_F16_F32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PKRTZ_F16_F32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PKRTZ_F16_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - class Inst_VOP3__V_CVT_PK_U16_U32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PK_U16_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PK_U16_U32 - - class Inst_VOP3__V_CVT_PK_I16_I32 : public Inst_VOP3 - { - public: - Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3*); - ~Inst_VOP3__V_CVT_PK_I16_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //src_0 - return 4; - case 1: //src_1 - return 4; - case 2: //vdst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_VOP3__V_CVT_PK_I16_I32 - - class Inst_DS__DS_ADD_U32 : public Inst_DS - { - public: - Inst_DS__DS_ADD_U32(InFmt_DS*); - ~Inst_DS__DS_ADD_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_U32 - - class Inst_DS__DS_SUB_U32 : public Inst_DS - { - public: - Inst_DS__DS_SUB_U32(InFmt_DS*); - ~Inst_DS__DS_SUB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SUB_U32 - - class Inst_DS__DS_RSUB_U32 : public Inst_DS - { - public: - Inst_DS__DS_RSUB_U32(InFmt_DS*); - ~Inst_DS__DS_RSUB_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_RSUB_U32 - - class Inst_DS__DS_INC_U32 : public Inst_DS - { - public: - Inst_DS__DS_INC_U32(InFmt_DS*); - ~Inst_DS__DS_INC_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_INC_U32 - - class Inst_DS__DS_DEC_U32 : public Inst_DS - { - public: - Inst_DS__DS_DEC_U32(InFmt_DS*); - ~Inst_DS__DS_DEC_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_DEC_U32 - - class Inst_DS__DS_MIN_I32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_I32(InFmt_DS*); - ~Inst_DS__DS_MIN_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_I32 - - class Inst_DS__DS_MAX_I32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_I32(InFmt_DS*); - ~Inst_DS__DS_MAX_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_I32 - - class Inst_DS__DS_MIN_U32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_U32(InFmt_DS*); - ~Inst_DS__DS_MIN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_U32 - - class Inst_DS__DS_MAX_U32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_U32(InFmt_DS*); - ~Inst_DS__DS_MAX_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_U32 - - class Inst_DS__DS_AND_B32 : public Inst_DS - { - public: - Inst_DS__DS_AND_B32(InFmt_DS*); - ~Inst_DS__DS_AND_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_AND_B32 - - class Inst_DS__DS_OR_B32 : public Inst_DS - { - public: - Inst_DS__DS_OR_B32(InFmt_DS*); - ~Inst_DS__DS_OR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_OR_B32 - - class Inst_DS__DS_XOR_B32 : public Inst_DS - { - public: - Inst_DS__DS_XOR_B32(InFmt_DS*); - ~Inst_DS__DS_XOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_XOR_B32 - - class Inst_DS__DS_MSKOR_B32 : public Inst_DS - { - public: - Inst_DS__DS_MSKOR_B32(InFmt_DS*); - ~Inst_DS__DS_MSKOR_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MSKOR_B32 - - class Inst_DS__DS_WRITE_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_B32(InFmt_DS*); - ~Inst_DS__DS_WRITE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_B32 - - class Inst_DS__DS_WRITE2_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRITE2_B32(InFmt_DS*); - ~Inst_DS__DS_WRITE2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_d1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE2_B32 - - class Inst_DS__DS_WRITE2ST64_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRITE2ST64_B32(InFmt_DS*); - ~Inst_DS__DS_WRITE2ST64_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_d1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE2ST64_B32 - - class Inst_DS__DS_CMPST_B32 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_B32(InFmt_DS*); - ~Inst_DS__DS_CMPST_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_B32 - - class Inst_DS__DS_CMPST_F32 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_F32(InFmt_DS*); - ~Inst_DS__DS_CMPST_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_F32 - - class Inst_DS__DS_MIN_F32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_F32(InFmt_DS*); - ~Inst_DS__DS_MIN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_F32 - - class Inst_DS__DS_MAX_F32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_F32(InFmt_DS*); - ~Inst_DS__DS_MAX_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_F32 - - class Inst_DS__DS_NOP : public Inst_DS - { - public: - Inst_DS__DS_NOP(InFmt_DS*); - ~Inst_DS__DS_NOP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_NOP - - class Inst_DS__DS_ADD_F32 : public Inst_DS - { - public: - Inst_DS__DS_ADD_F32(InFmt_DS*); - ~Inst_DS__DS_ADD_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_F32 - - class Inst_DS__DS_WRITE_B8 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_B8(InFmt_DS*); - ~Inst_DS__DS_WRITE_B8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 1; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_B8 - - class Inst_DS__DS_WRITE_B16 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_B16(InFmt_DS*); - ~Inst_DS__DS_WRITE_B16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_B16 - - class Inst_DS__DS_ADD_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_ADD_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_ADD_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_RTN_U32 - - class Inst_DS__DS_SUB_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_SUB_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_SUB_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SUB_RTN_U32 - - class Inst_DS__DS_RSUB_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_RSUB_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_RSUB_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_RSUB_RTN_U32 - - class Inst_DS__DS_INC_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_INC_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_INC_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_INC_RTN_U32 - - class Inst_DS__DS_DEC_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_DEC_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_DEC_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_DEC_RTN_U32 - - class Inst_DS__DS_MIN_RTN_I32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_RTN_I32(InFmt_DS*); - ~Inst_DS__DS_MIN_RTN_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_RTN_I32 - - class Inst_DS__DS_MAX_RTN_I32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_RTN_I32(InFmt_DS*); - ~Inst_DS__DS_MAX_RTN_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_RTN_I32 - - class Inst_DS__DS_MIN_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_MIN_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_RTN_U32 - - class Inst_DS__DS_MAX_RTN_U32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_RTN_U32(InFmt_DS*); - ~Inst_DS__DS_MAX_RTN_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_RTN_U32 - - class Inst_DS__DS_AND_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_AND_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_AND_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_AND_RTN_B32 - - class Inst_DS__DS_OR_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_OR_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_OR_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_OR_RTN_B32 - - class Inst_DS__DS_XOR_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_XOR_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_XOR_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_XOR_RTN_B32 - - class Inst_DS__DS_MSKOR_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_MSKOR_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MSKOR_RTN_B32 - - class Inst_DS__DS_WRXCHG_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_WRXCHG_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRXCHG_RTN_B32 - - class Inst_DS__DS_WRXCHG2_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_WRXCHG2_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRXCHG2_RTN_B32 - - class Inst_DS__DS_WRXCHG2ST64_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRXCHG2ST64_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_WRXCHG2ST64_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - class Inst_DS__DS_CMPST_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_CMPST_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_RTN_B32 - - class Inst_DS__DS_CMPST_RTN_F32 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_RTN_F32(InFmt_DS*); - ~Inst_DS__DS_CMPST_RTN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_RTN_F32 - - class Inst_DS__DS_MIN_RTN_F32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_RTN_F32(InFmt_DS*); - ~Inst_DS__DS_MIN_RTN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_RTN_F32 - - class Inst_DS__DS_MAX_RTN_F32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_RTN_F32(InFmt_DS*); - ~Inst_DS__DS_MAX_RTN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_RTN_F32 - - class Inst_DS__DS_WRAP_RTN_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRAP_RTN_B32(InFmt_DS*); - ~Inst_DS__DS_WRAP_RTN_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRAP_RTN_B32 - - class Inst_DS__DS_ADD_RTN_F32 : public Inst_DS - { - public: - Inst_DS__DS_ADD_RTN_F32(InFmt_DS*); - ~Inst_DS__DS_ADD_RTN_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_RTN_F32 - - class Inst_DS__DS_READ_B32 : public Inst_DS - { - public: - Inst_DS__DS_READ_B32(InFmt_DS*); - ~Inst_DS__DS_READ_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_B32 - - class Inst_DS__DS_READ2_B32 : public Inst_DS - { - public: - Inst_DS__DS_READ2_B32(InFmt_DS*); - ~Inst_DS__DS_READ2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ2_B32 - - class Inst_DS__DS_READ2ST64_B32 : public Inst_DS - { - public: - Inst_DS__DS_READ2ST64_B32(InFmt_DS*); - ~Inst_DS__DS_READ2ST64_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ2ST64_B32 - - class Inst_DS__DS_READ_I8 : public Inst_DS - { - public: - Inst_DS__DS_READ_I8(InFmt_DS*); - ~Inst_DS__DS_READ_I8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 1; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_I8 - - class Inst_DS__DS_READ_U8 : public Inst_DS - { - public: - Inst_DS__DS_READ_U8(InFmt_DS*); - ~Inst_DS__DS_READ_U8(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 1; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_U8 - - class Inst_DS__DS_READ_I16 : public Inst_DS - { - public: - Inst_DS__DS_READ_I16(InFmt_DS*); - ~Inst_DS__DS_READ_I16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_I16 - - class Inst_DS__DS_READ_U16 : public Inst_DS - { - public: - Inst_DS__DS_READ_U16(InFmt_DS*); - ~Inst_DS__DS_READ_U16(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_U16 - - class Inst_DS__DS_SWIZZLE_B32 : public Inst_DS - { - public: - Inst_DS__DS_SWIZZLE_B32(InFmt_DS*); - ~Inst_DS__DS_SWIZZLE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SWIZZLE_B32 - - class Inst_DS__DS_PERMUTE_B32 : public Inst_DS - { - public: - Inst_DS__DS_PERMUTE_B32(InFmt_DS*); - ~Inst_DS__DS_PERMUTE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_PERMUTE_B32 - - class Inst_DS__DS_BPERMUTE_B32 : public Inst_DS - { - public: - Inst_DS__DS_BPERMUTE_B32(InFmt_DS*); - ~Inst_DS__DS_BPERMUTE_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 4; - case 2: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_BPERMUTE_B32 - - class Inst_DS__DS_ADD_U64 : public Inst_DS - { - public: - Inst_DS__DS_ADD_U64(InFmt_DS*); - ~Inst_DS__DS_ADD_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_U64 - - class Inst_DS__DS_SUB_U64 : public Inst_DS - { - public: - Inst_DS__DS_SUB_U64(InFmt_DS*); - ~Inst_DS__DS_SUB_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SUB_U64 - - class Inst_DS__DS_RSUB_U64 : public Inst_DS - { - public: - Inst_DS__DS_RSUB_U64(InFmt_DS*); - ~Inst_DS__DS_RSUB_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_RSUB_U64 - - class Inst_DS__DS_INC_U64 : public Inst_DS - { - public: - Inst_DS__DS_INC_U64(InFmt_DS*); - ~Inst_DS__DS_INC_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_INC_U64 - - class Inst_DS__DS_DEC_U64 : public Inst_DS - { - public: - Inst_DS__DS_DEC_U64(InFmt_DS*); - ~Inst_DS__DS_DEC_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_DEC_U64 - - class Inst_DS__DS_MIN_I64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_I64(InFmt_DS*); - ~Inst_DS__DS_MIN_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_I64 - - class Inst_DS__DS_MAX_I64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_I64(InFmt_DS*); - ~Inst_DS__DS_MAX_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_I64 - - class Inst_DS__DS_MIN_U64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_U64(InFmt_DS*); - ~Inst_DS__DS_MIN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_U64 - - class Inst_DS__DS_MAX_U64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_U64(InFmt_DS*); - ~Inst_DS__DS_MAX_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_U64 - - class Inst_DS__DS_AND_B64 : public Inst_DS - { - public: - Inst_DS__DS_AND_B64(InFmt_DS*); - ~Inst_DS__DS_AND_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_AND_B64 - - class Inst_DS__DS_OR_B64 : public Inst_DS - { - public: - Inst_DS__DS_OR_B64(InFmt_DS*); - ~Inst_DS__DS_OR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_OR_B64 - - class Inst_DS__DS_XOR_B64 : public Inst_DS - { - public: - Inst_DS__DS_XOR_B64(InFmt_DS*); - ~Inst_DS__DS_XOR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_XOR_B64 - - class Inst_DS__DS_MSKOR_B64 : public Inst_DS - { - public: - Inst_DS__DS_MSKOR_B64(InFmt_DS*); - ~Inst_DS__DS_MSKOR_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MSKOR_B64 - - class Inst_DS__DS_WRITE_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_B64(InFmt_DS*); - ~Inst_DS__DS_WRITE_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_B64 - - class Inst_DS__DS_WRITE2_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRITE2_B64(InFmt_DS*); - ~Inst_DS__DS_WRITE2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_d1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE2_B64 - - class Inst_DS__DS_WRITE2ST64_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRITE2ST64_B64(InFmt_DS*); - ~Inst_DS__DS_WRITE2ST64_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE2ST64_B64 - - class Inst_DS__DS_CMPST_B64 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_B64(InFmt_DS*); - ~Inst_DS__DS_CMPST_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_B64 - - class Inst_DS__DS_CMPST_F64 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_F64(InFmt_DS*); - ~Inst_DS__DS_CMPST_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_F64 - - class Inst_DS__DS_MIN_F64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_F64(InFmt_DS*); - ~Inst_DS__DS_MIN_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_F64 - - class Inst_DS__DS_MAX_F64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_F64(InFmt_DS*); - ~Inst_DS__DS_MAX_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_F64 - - class Inst_DS__DS_ADD_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_ADD_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_ADD_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_RTN_U64 - - class Inst_DS__DS_SUB_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_SUB_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_SUB_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SUB_RTN_U64 - - class Inst_DS__DS_RSUB_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_RSUB_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_RSUB_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_RSUB_RTN_U64 - - class Inst_DS__DS_INC_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_INC_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_INC_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_INC_RTN_U64 - - class Inst_DS__DS_DEC_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_DEC_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_DEC_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_DEC_RTN_U64 - - class Inst_DS__DS_MIN_RTN_I64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_RTN_I64(InFmt_DS*); - ~Inst_DS__DS_MIN_RTN_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_RTN_I64 - - class Inst_DS__DS_MAX_RTN_I64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_RTN_I64(InFmt_DS*); - ~Inst_DS__DS_MAX_RTN_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_RTN_I64 - - class Inst_DS__DS_MIN_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_MIN_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_RTN_U64 - - class Inst_DS__DS_MAX_RTN_U64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_RTN_U64(InFmt_DS*); - ~Inst_DS__DS_MAX_RTN_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_RTN_U64 - - class Inst_DS__DS_AND_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_AND_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_AND_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_AND_RTN_B64 - - class Inst_DS__DS_OR_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_OR_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_OR_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_OR_RTN_B64 - - class Inst_DS__DS_XOR_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_XOR_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_XOR_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_XOR_RTN_B64 - - class Inst_DS__DS_MSKOR_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_MSKOR_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MSKOR_RTN_B64 - - class Inst_DS__DS_WRXCHG_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_WRXCHG_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRXCHG_RTN_B64 - - class Inst_DS__DS_WRXCHG2_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_WRXCHG2_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRXCHG2_RTN_B64 - - class Inst_DS__DS_WRXCHG2ST64_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRXCHG2ST64_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_WRXCHG2ST64_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - class Inst_DS__DS_CMPST_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_CMPST_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_RTN_B64 - - class Inst_DS__DS_CMPST_RTN_F64 : public Inst_DS - { - public: - Inst_DS__DS_CMPST_RTN_F64(InFmt_DS*); - ~Inst_DS__DS_CMPST_RTN_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d1 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CMPST_RTN_F64 - - class Inst_DS__DS_MIN_RTN_F64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_RTN_F64(InFmt_DS*); - ~Inst_DS__DS_MIN_RTN_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_RTN_F64 - - class Inst_DS__DS_MAX_RTN_F64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_RTN_F64(InFmt_DS*); - ~Inst_DS__DS_MAX_RTN_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_RTN_F64 - - class Inst_DS__DS_READ_B64 : public Inst_DS - { - public: - Inst_DS__DS_READ_B64(InFmt_DS*); - ~Inst_DS__DS_READ_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_B64 - - class Inst_DS__DS_READ2_B64 : public Inst_DS - { - public: - Inst_DS__DS_READ2_B64(InFmt_DS*); - ~Inst_DS__DS_READ2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ2_B64 - - class Inst_DS__DS_READ2ST64_B64 : public Inst_DS - { - public: - Inst_DS__DS_READ2ST64_B64(InFmt_DS*); - ~Inst_DS__DS_READ2ST64_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ2ST64_B64 - - class Inst_DS__DS_CONDXCHG32_RTN_B64 : public Inst_DS - { - public: - Inst_DS__DS_CONDXCHG32_RTN_B64(InFmt_DS*); - ~Inst_DS__DS_CONDXCHG32_RTN_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 8; - case 2: //vgpr_rtn - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CONDXCHG32_RTN_B64 - - class Inst_DS__DS_ADD_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_ADD_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_ADD_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_SRC2_U32 - - class Inst_DS__DS_SUB_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_SUB_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_SUB_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SUB_SRC2_U32 - - class Inst_DS__DS_RSUB_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_RSUB_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_RSUB_SRC2_U32 - - class Inst_DS__DS_INC_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_INC_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_INC_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_INC_SRC2_U32 - - class Inst_DS__DS_DEC_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_DEC_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_DEC_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_DEC_SRC2_U32 - - class Inst_DS__DS_MIN_SRC2_I32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_SRC2_I32(InFmt_DS*); - ~Inst_DS__DS_MIN_SRC2_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_SRC2_I32 - - class Inst_DS__DS_MAX_SRC2_I32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_SRC2_I32(InFmt_DS*); - ~Inst_DS__DS_MAX_SRC2_I32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_SRC2_I32 - - class Inst_DS__DS_MIN_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_MIN_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_SRC2_U32 - - class Inst_DS__DS_MAX_SRC2_U32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_SRC2_U32(InFmt_DS*); - ~Inst_DS__DS_MAX_SRC2_U32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_SRC2_U32 - - class Inst_DS__DS_AND_SRC2_B32 : public Inst_DS - { - public: - Inst_DS__DS_AND_SRC2_B32(InFmt_DS*); - ~Inst_DS__DS_AND_SRC2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_AND_SRC2_B32 - - class Inst_DS__DS_OR_SRC2_B32 : public Inst_DS - { - public: - Inst_DS__DS_OR_SRC2_B32(InFmt_DS*); - ~Inst_DS__DS_OR_SRC2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_OR_SRC2_B32 - - class Inst_DS__DS_XOR_SRC2_B32 : public Inst_DS - { - public: - Inst_DS__DS_XOR_SRC2_B32(InFmt_DS*); - ~Inst_DS__DS_XOR_SRC2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_XOR_SRC2_B32 - - class Inst_DS__DS_WRITE_SRC2_B32 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS*); - ~Inst_DS__DS_WRITE_SRC2_B32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_SRC2_B32 - - class Inst_DS__DS_MIN_SRC2_F32 : public Inst_DS - { - public: - Inst_DS__DS_MIN_SRC2_F32(InFmt_DS*); - ~Inst_DS__DS_MIN_SRC2_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_SRC2_F32 - - class Inst_DS__DS_MAX_SRC2_F32 : public Inst_DS - { - public: - Inst_DS__DS_MAX_SRC2_F32(InFmt_DS*); - ~Inst_DS__DS_MAX_SRC2_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_SRC2_F32 - - class Inst_DS__DS_ADD_SRC2_F32 : public Inst_DS - { - public: - Inst_DS__DS_ADD_SRC2_F32(InFmt_DS*); - ~Inst_DS__DS_ADD_SRC2_F32(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_SRC2_F32 - - class Inst_DS__DS_GWS_SEMA_RELEASE_ALL : public Inst_DS - { - public: - Inst_DS__DS_GWS_SEMA_RELEASE_ALL(InFmt_DS*); - ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - class Inst_DS__DS_GWS_INIT : public Inst_DS - { - public: - Inst_DS__DS_GWS_INIT(InFmt_DS*); - ~Inst_DS__DS_GWS_INIT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d0 - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_GWS_INIT - - class Inst_DS__DS_GWS_SEMA_V : public Inst_DS - { - public: - Inst_DS__DS_GWS_SEMA_V(InFmt_DS*); - ~Inst_DS__DS_GWS_SEMA_V(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_GWS_SEMA_V - - class Inst_DS__DS_GWS_SEMA_BR : public Inst_DS - { - public: - Inst_DS__DS_GWS_SEMA_BR(InFmt_DS*); - ~Inst_DS__DS_GWS_SEMA_BR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d0 - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_GWS_SEMA_BR - - class Inst_DS__DS_GWS_SEMA_P : public Inst_DS - { - public: - Inst_DS__DS_GWS_SEMA_P(InFmt_DS*); - ~Inst_DS__DS_GWS_SEMA_P(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_GWS_SEMA_P - - class Inst_DS__DS_GWS_BARRIER : public Inst_DS - { - public: - Inst_DS__DS_GWS_BARRIER(InFmt_DS*); - ~Inst_DS__DS_GWS_BARRIER(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d0 - return 4; - case 1: //vgpr_d0 - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_GWS_BARRIER - - class Inst_DS__DS_CONSUME : public Inst_DS - { - public: - Inst_DS__DS_CONSUME(InFmt_DS*); - ~Inst_DS__DS_CONSUME(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_CONSUME - - class Inst_DS__DS_APPEND : public Inst_DS - { - public: - Inst_DS__DS_APPEND(InFmt_DS*); - ~Inst_DS__DS_APPEND(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_APPEND - - class Inst_DS__DS_ORDERED_COUNT : public Inst_DS - { - public: - Inst_DS__DS_ORDERED_COUNT(InFmt_DS*); - ~Inst_DS__DS_ORDERED_COUNT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ORDERED_COUNT - - class Inst_DS__DS_ADD_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_ADD_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_ADD_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_ADD_SRC2_U64 - - class Inst_DS__DS_SUB_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_SUB_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_SUB_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_SUB_SRC2_U64 - - class Inst_DS__DS_RSUB_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_RSUB_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_RSUB_SRC2_U64 - - class Inst_DS__DS_INC_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_INC_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_INC_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_INC_SRC2_U64 - - class Inst_DS__DS_DEC_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_DEC_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_DEC_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_DEC_SRC2_U64 - - class Inst_DS__DS_MIN_SRC2_I64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_SRC2_I64(InFmt_DS*); - ~Inst_DS__DS_MIN_SRC2_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_SRC2_I64 - - class Inst_DS__DS_MAX_SRC2_I64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_SRC2_I64(InFmt_DS*); - ~Inst_DS__DS_MAX_SRC2_I64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_SRC2_I64 - - class Inst_DS__DS_MIN_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_MIN_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_SRC2_U64 - - class Inst_DS__DS_MAX_SRC2_U64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_SRC2_U64(InFmt_DS*); - ~Inst_DS__DS_MAX_SRC2_U64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_SRC2_U64 - - class Inst_DS__DS_AND_SRC2_B64 : public Inst_DS - { - public: - Inst_DS__DS_AND_SRC2_B64(InFmt_DS*); - ~Inst_DS__DS_AND_SRC2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_AND_SRC2_B64 - - class Inst_DS__DS_OR_SRC2_B64 : public Inst_DS - { - public: - Inst_DS__DS_OR_SRC2_B64(InFmt_DS*); - ~Inst_DS__DS_OR_SRC2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_OR_SRC2_B64 - - class Inst_DS__DS_XOR_SRC2_B64 : public Inst_DS - { - public: - Inst_DS__DS_XOR_SRC2_B64(InFmt_DS*); - ~Inst_DS__DS_XOR_SRC2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_XOR_SRC2_B64 - - class Inst_DS__DS_WRITE_SRC2_B64 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS*); - ~Inst_DS__DS_WRITE_SRC2_B64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_SRC2_B64 - - class Inst_DS__DS_MIN_SRC2_F64 : public Inst_DS - { - public: - Inst_DS__DS_MIN_SRC2_F64(InFmt_DS*); - ~Inst_DS__DS_MIN_SRC2_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MIN_SRC2_F64 - - class Inst_DS__DS_MAX_SRC2_F64 : public Inst_DS - { - public: - Inst_DS__DS_MAX_SRC2_F64(InFmt_DS*); - ~Inst_DS__DS_MAX_SRC2_F64(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_DS__DS_MAX_SRC2_F64 - - class Inst_DS__DS_WRITE_B96 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_B96(InFmt_DS*); - ~Inst_DS__DS_WRITE_B96(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_B96 - - class Inst_DS__DS_WRITE_B128 : public Inst_DS - { - public: - Inst_DS__DS_WRITE_B128(InFmt_DS*); - ~Inst_DS__DS_WRITE_B128(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_d0 - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_WRITE_B128 - - class Inst_DS__DS_READ_B96 : public Inst_DS - { - public: - Inst_DS__DS_READ_B96(InFmt_DS*); - ~Inst_DS__DS_READ_B96(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_B96 - - class Inst_DS__DS_READ_B128 : public Inst_DS - { - public: - Inst_DS__DS_READ_B128(InFmt_DS*); - ~Inst_DS__DS_READ_B128(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //vgpr_rtn - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_DS__DS_READ_B128 - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_X : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_XY : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - class Inst_MUBUF__BUFFER_STORE_FORMAT_X : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 4; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_X - - class Inst_MUBUF__BUFFER_STORE_FORMAT_XY : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 8; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - class Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 12; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - class Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 16; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - class Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - class Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 4; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - class Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 8; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - class Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 12; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - class Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 16; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - class Inst_MUBUF__BUFFER_LOAD_UBYTE : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_UBYTE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 4; - case 1: //vgpr_a - if (instData.OFFEN && instData.IDXEN) { - // if we need an idx and offset from a - // VGPR, we'll read VGPR[VADDR] and - // VGPR[VADDR + 1], otherwise we just - // read VGPR[VADDR] - return 8; - } else { - return 4; - } - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_UBYTE - - class Inst_MUBUF__BUFFER_LOAD_SBYTE : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_SBYTE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_SBYTE - - class Inst_MUBUF__BUFFER_LOAD_USHORT : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_USHORT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_USHORT - - class Inst_MUBUF__BUFFER_LOAD_SSHORT : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_SSHORT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_SSHORT - - class Inst_MUBUF__BUFFER_LOAD_DWORD : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_DWORD - - class Inst_MUBUF__BUFFER_LOAD_DWORDX2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - class Inst_MUBUF__BUFFER_LOAD_DWORDX3 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_DWORDX3(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - class Inst_MUBUF__BUFFER_LOAD_DWORDX4 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_LOAD_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - class Inst_MUBUF__BUFFER_STORE_BYTE : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_BYTE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 4; - case 1: //vgpr_a - if (instData.OFFEN && instData.IDXEN) { - // if we need an idx and offset from a - // VGPR, we'll read VGPR[VADDR] and - // VGPR[VADDR + 1], otherwise we just - // read VGPR[VADDR] - return 8; - } else { - return 4; - } - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_BYTE - - class Inst_MUBUF__BUFFER_STORE_SHORT : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_SHORT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 4; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_SHORT - - class Inst_MUBUF__BUFFER_STORE_DWORD : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 4; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_DWORD - - class Inst_MUBUF__BUFFER_STORE_DWORDX2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 8; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_DWORDX2 - - class Inst_MUBUF__BUFFER_STORE_DWORDX3 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_DWORDX3(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 12; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_DWORDX3 - - class Inst_MUBUF__BUFFER_STORE_DWORDX4 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_s - return 16; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 16; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_DWORDX4 - - class Inst_MUBUF__BUFFER_STORE_LDS_DWORD : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //sgpr_r - return 16; - case 1: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - class Inst_MUBUF__BUFFER_WBINVL1 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_WBINVL1(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_WBINVL1 - - class Inst_MUBUF__BUFFER_WBINVL1_VOL : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_WBINVL1_VOL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 0; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_WBINVL1_VOL - - class Inst_MUBUF__BUFFER_ATOMIC_SWAP : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SWAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SWAP - - class Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - class Inst_MUBUF__BUFFER_ATOMIC_ADD : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_ADD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_ADD - - class Inst_MUBUF__BUFFER_ATOMIC_SUB : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SUB(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SUB - - class Inst_MUBUF__BUFFER_ATOMIC_SMIN : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SMIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SMIN - - class Inst_MUBUF__BUFFER_ATOMIC_UMIN : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_UMIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_UMIN - - class Inst_MUBUF__BUFFER_ATOMIC_SMAX : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SMAX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SMAX - - class Inst_MUBUF__BUFFER_ATOMIC_UMAX : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_UMAX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_UMAX - - class Inst_MUBUF__BUFFER_ATOMIC_AND : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_AND(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_AND - - class Inst_MUBUF__BUFFER_ATOMIC_OR : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_OR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_OR - - class Inst_MUBUF__BUFFER_ATOMIC_XOR : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_XOR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_XOR - - class Inst_MUBUF__BUFFER_ATOMIC_INC : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_INC(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_INC - - class Inst_MUBUF__BUFFER_ATOMIC_DEC : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_DEC(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_DEC - - class Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_AND_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_OR_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_INC_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - class Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 : public Inst_MUBUF - { - public: - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF*); - ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 16; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_X : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_X : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 32; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_XY : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 8; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 12; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 16; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - class Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 8; - case 1: //sgpr_r - return 4; - case 2: //sgpr_o - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 32; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 8; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 12; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - class Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW : public Inst_MTBUF - { - public: - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF*); - ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 16; - case 1: //vgpr_a - return 8; - case 2: //sgpr_r - return 4; - case 3: //sgpr_o - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - class Inst_MIMG__IMAGE_LOAD : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_LOAD(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_LOAD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_LOAD - - class Inst_MIMG__IMAGE_LOAD_MIP : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_LOAD_MIP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_LOAD_MIP - - class Inst_MIMG__IMAGE_LOAD_PCK : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_LOAD_PCK(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_LOAD_PCK - - class Inst_MIMG__IMAGE_LOAD_PCK_SGN : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_LOAD_PCK_SGN(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_LOAD_PCK_SGN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_LOAD_PCK_SGN - - class Inst_MIMG__IMAGE_LOAD_MIP_PCK : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_LOAD_MIP_PCK(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_LOAD_MIP_PCK(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_LOAD_MIP_PCK - - class Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - class Inst_MIMG__IMAGE_STORE : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_STORE(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_STORE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 16; - case 1: //vgpr_a - return 16; - case 2: //sgpr_r - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_STORE - - class Inst_MIMG__IMAGE_STORE_MIP : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_STORE_MIP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 16; - case 1: //vgpr_a - return 16; - case 2: //sgpr_r - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_STORE_MIP - - class Inst_MIMG__IMAGE_STORE_PCK : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_STORE_PCK(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 16; - case 1: //vgpr_a - return 16; - case 2: //sgpr_r - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_STORE_PCK - - class Inst_MIMG__IMAGE_STORE_MIP_PCK : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_STORE_MIP_PCK(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_STORE_MIP_PCK(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_d - return 16; - case 1: //vgpr_a - return 16; - case 2: //sgpr_r - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_STORE_MIP_PCK - - class Inst_MIMG__IMAGE_GET_RESINFO : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GET_RESINFO(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GET_RESINFO(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 16; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GET_RESINFO - - class Inst_MIMG__IMAGE_ATOMIC_SWAP : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_SWAP(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_SWAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_SWAP - - class Inst_MIMG__IMAGE_ATOMIC_CMPSWAP : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - class Inst_MIMG__IMAGE_ATOMIC_ADD : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_ADD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_ADD - - class Inst_MIMG__IMAGE_ATOMIC_SUB : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_SUB(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_SUB - - class Inst_MIMG__IMAGE_ATOMIC_SMIN : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_SMIN(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_SMIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_SMIN - - class Inst_MIMG__IMAGE_ATOMIC_UMIN : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_UMIN(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_UMIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_UMIN - - class Inst_MIMG__IMAGE_ATOMIC_SMAX : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_SMAX(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_SMAX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_SMAX - - class Inst_MIMG__IMAGE_ATOMIC_UMAX : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_UMAX(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_UMAX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_UMAX - - class Inst_MIMG__IMAGE_ATOMIC_AND : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_AND(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_AND - - class Inst_MIMG__IMAGE_ATOMIC_OR : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_OR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_OR - - class Inst_MIMG__IMAGE_ATOMIC_XOR : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_XOR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_XOR - - class Inst_MIMG__IMAGE_ATOMIC_INC : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_INC(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_INC - - class Inst_MIMG__IMAGE_ATOMIC_DEC : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_ATOMIC_DEC(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 32; - case 1: //sgpr_r - return 32; - case 2: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_ATOMIC_DEC - - class Inst_MIMG__IMAGE_SAMPLE : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE - - class Inst_MIMG__IMAGE_SAMPLE_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_CL - - class Inst_MIMG__IMAGE_SAMPLE_D : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_D(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_D - - class Inst_MIMG__IMAGE_SAMPLE_D_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_D_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_D_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_D_CL - - class Inst_MIMG__IMAGE_SAMPLE_L : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_L(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_L - - class Inst_MIMG__IMAGE_SAMPLE_B : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_B(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_B - - class Inst_MIMG__IMAGE_SAMPLE_B_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_B_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_B_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_B_CL - - class Inst_MIMG__IMAGE_SAMPLE_LZ : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_LZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_LZ - - class Inst_MIMG__IMAGE_SAMPLE_C : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C - - class Inst_MIMG__IMAGE_SAMPLE_C_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_CL - - class Inst_MIMG__IMAGE_SAMPLE_C_D : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_D(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_D - - class Inst_MIMG__IMAGE_SAMPLE_C_D_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_D_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - class Inst_MIMG__IMAGE_SAMPLE_C_L : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_L(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_L - - class Inst_MIMG__IMAGE_SAMPLE_C_B : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_B(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_B - - class Inst_MIMG__IMAGE_SAMPLE_C_B_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_B_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - class Inst_MIMG__IMAGE_SAMPLE_C_LZ : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_LZ(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_LZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_LZ - - class Inst_MIMG__IMAGE_SAMPLE_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_O - - class Inst_MIMG__IMAGE_SAMPLE_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_D_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_D_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_D_O - - class Inst_MIMG__IMAGE_SAMPLE_D_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_D_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_L_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_L_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_L_O - - class Inst_MIMG__IMAGE_SAMPLE_B_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_B_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_B_O - - class Inst_MIMG__IMAGE_SAMPLE_B_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_B_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_LZ_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_LZ_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_LZ_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_LZ_O - - class Inst_MIMG__IMAGE_SAMPLE_C_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_O - - class Inst_MIMG__IMAGE_SAMPLE_C_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_C_D_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_D_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_D_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_D_O - - class Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_C_L_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_L_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_L_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_L_O - - class Inst_MIMG__IMAGE_SAMPLE_C_B_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_B_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_B_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_B_O - - class Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_C_LZ_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - class Inst_MIMG__IMAGE_GATHER4 : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4 - - class Inst_MIMG__IMAGE_GATHER4_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_CL - - class Inst_MIMG__IMAGE_GATHER4_L : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_L(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_L - - class Inst_MIMG__IMAGE_GATHER4_B : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_B(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_B - - class Inst_MIMG__IMAGE_GATHER4_B_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_B_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_B_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_B_CL - - class Inst_MIMG__IMAGE_GATHER4_LZ : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_LZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_LZ - - class Inst_MIMG__IMAGE_GATHER4_C : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C - - class Inst_MIMG__IMAGE_GATHER4_C_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_CL - - class Inst_MIMG__IMAGE_GATHER4_C_L : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_L(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_L(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_L - - class Inst_MIMG__IMAGE_GATHER4_C_B : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_B(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_B(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_B - - class Inst_MIMG__IMAGE_GATHER4_C_B_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_B_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_B_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_B_CL - - class Inst_MIMG__IMAGE_GATHER4_C_LZ : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_LZ(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_LZ(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_LZ - - class Inst_MIMG__IMAGE_GATHER4_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_O - - class Inst_MIMG__IMAGE_GATHER4_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_CL_O - - class Inst_MIMG__IMAGE_GATHER4_L_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_L_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_L_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_L_O - - class Inst_MIMG__IMAGE_GATHER4_B_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_B_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_B_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_B_O - - class Inst_MIMG__IMAGE_GATHER4_B_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_B_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_B_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_B_CL_O - - class Inst_MIMG__IMAGE_GATHER4_LZ_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_LZ_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_LZ_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_LZ_O - - class Inst_MIMG__IMAGE_GATHER4_C_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_O - - class Inst_MIMG__IMAGE_GATHER4_C_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_CL_O - - class Inst_MIMG__IMAGE_GATHER4_C_L_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_L_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_L_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_L_O - - class Inst_MIMG__IMAGE_GATHER4_C_B_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_B_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_B_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_B_O - - class Inst_MIMG__IMAGE_GATHER4_C_B_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - class Inst_MIMG__IMAGE_GATHER4_C_LZ_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GATHER4_C_LZ_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - class Inst_MIMG__IMAGE_GET_LOD : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_GET_LOD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_GET_LOD - - class Inst_MIMG__IMAGE_SAMPLE_CD : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_CD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_CD - - class Inst_MIMG__IMAGE_SAMPLE_CD_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_CD_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_CD_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_CD_CL - - class Inst_MIMG__IMAGE_SAMPLE_C_CD : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_CD(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_CD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_CD - - class Inst_MIMG__IMAGE_SAMPLE_C_CD_CL : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - class Inst_MIMG__IMAGE_SAMPLE_CD_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_CD_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_CD_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_CD_O - - class Inst_MIMG__IMAGE_SAMPLE_CD_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - class Inst_MIMG__IMAGE_SAMPLE_C_CD_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_CD_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - class Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O : public Inst_MIMG - { - public: - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(InFmt_MIMG*); - ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 3; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_a - return 4; - case 1: //sgpr_r - return 32; - case 2: //sgpr_s - return 4; - case 3: //vgpr_d - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - class Inst_EXP__EXP : public Inst_EXP - { - public: - Inst_EXP__EXP(InFmt_EXP*); - ~Inst_EXP__EXP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 4; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: // - return 32; - case 1: // - return 32; - case 2: // - return 32; - case 3: // - return 32; - case 4: // - return 32; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_EXP__EXP - - class Inst_FLAT__FLAT_LOAD_UBYTE : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_UBYTE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 1; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_UBYTE - - class Inst_FLAT__FLAT_LOAD_SBYTE : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_SBYTE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 1; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_SBYTE - - class Inst_FLAT__FLAT_LOAD_USHORT : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_USHORT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_USHORT - - class Inst_FLAT__FLAT_LOAD_SSHORT : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_SSHORT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_SSHORT - - class Inst_FLAT__FLAT_LOAD_DWORD : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_DWORD - - class Inst_FLAT__FLAT_LOAD_DWORDX2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_DWORDX2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_DWORDX2 - - class Inst_FLAT__FLAT_LOAD_DWORDX3 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_DWORDX3(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_DWORDX3(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_DWORDX3 - - class Inst_FLAT__FLAT_LOAD_DWORDX4 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_LOAD_DWORDX4(InFmt_FLAT*); - ~Inst_FLAT__FLAT_LOAD_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 1; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_dst - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_LOAD_DWORDX4 - - class Inst_FLAT__FLAT_STORE_BYTE : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT*); - ~Inst_FLAT__FLAT_STORE_BYTE(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 1; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_STORE_BYTE - - class Inst_FLAT__FLAT_STORE_SHORT : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT*); - ~Inst_FLAT__FLAT_STORE_SHORT(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 2; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_STORE_SHORT - - class Inst_FLAT__FLAT_STORE_DWORD : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT*); - ~Inst_FLAT__FLAT_STORE_DWORD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_STORE_DWORD - - class Inst_FLAT__FLAT_STORE_DWORDX2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_STORE_DWORDX2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_STORE_DWORDX2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_STORE_DWORDX2 - - class Inst_FLAT__FLAT_STORE_DWORDX3 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_STORE_DWORDX3(InFmt_FLAT*); - ~Inst_FLAT__FLAT_STORE_DWORDX3(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 12; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_STORE_DWORDX3 - - class Inst_FLAT__FLAT_STORE_DWORDX4 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_STORE_DWORDX4(InFmt_FLAT*); - ~Inst_FLAT__FLAT_STORE_DWORDX4(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 16; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_STORE_DWORDX4 - - class Inst_FLAT__FLAT_ATOMIC_SWAP : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SWAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SWAP - - class Inst_FLAT__FLAT_ATOMIC_CMPSWAP : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - class Inst_FLAT__FLAT_ATOMIC_ADD : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_ADD(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_ADD - - class Inst_FLAT__FLAT_ATOMIC_SUB : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SUB(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SUB - - class Inst_FLAT__FLAT_ATOMIC_SMIN : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SMIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SMIN - - class Inst_FLAT__FLAT_ATOMIC_UMIN : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_UMIN(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_UMIN - - class Inst_FLAT__FLAT_ATOMIC_SMAX : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SMAX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SMAX - - class Inst_FLAT__FLAT_ATOMIC_UMAX : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_UMAX(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_UMAX - - class Inst_FLAT__FLAT_ATOMIC_AND : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_AND(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_AND - - class Inst_FLAT__FLAT_ATOMIC_OR : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_OR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_OR - - class Inst_FLAT__FLAT_ATOMIC_XOR : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_XOR(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_XOR - - class Inst_FLAT__FLAT_ATOMIC_INC : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_INC(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_INC - - class Inst_FLAT__FLAT_ATOMIC_DEC : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_DEC(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 4; - case 2: //vgpr_dst - return 4; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_DEC - - class Inst_FLAT__FLAT_ATOMIC_SWAP_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SWAP_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - class Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 16; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - class Inst_FLAT__FLAT_ATOMIC_ADD_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_ADD_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_ADD_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - class Inst_FLAT__FLAT_ATOMIC_SUB_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SUB_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SUB_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - class Inst_FLAT__FLAT_ATOMIC_SMIN_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SMIN_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - class Inst_FLAT__FLAT_ATOMIC_UMIN_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_UMIN_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - class Inst_FLAT__FLAT_ATOMIC_SMAX_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_SMAX_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - class Inst_FLAT__FLAT_ATOMIC_UMAX_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_UMAX_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - class Inst_FLAT__FLAT_ATOMIC_AND_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_AND_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_AND_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_AND_X2 - - class Inst_FLAT__FLAT_ATOMIC_OR_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_OR_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_OR_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_OR_X2 - - class Inst_FLAT__FLAT_ATOMIC_XOR_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_XOR_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_XOR_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - class Inst_FLAT__FLAT_ATOMIC_INC_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_INC_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_INC_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_INC_X2 - - class Inst_FLAT__FLAT_ATOMIC_DEC_X2 : public Inst_FLAT - { - public: - Inst_FLAT__FLAT_ATOMIC_DEC_X2(InFmt_FLAT*); - ~Inst_FLAT__FLAT_ATOMIC_DEC_X2(); - - int - getNumOperands() override - { - return numDstRegOperands() + numSrcRegOperands(); - } // getNumOperands - - int numDstRegOperands() override { return 1; } - int numSrcRegOperands() override { return 2; } - - int - getOperandSize(int opIdx) override - { - switch (opIdx) { - case 0: //vgpr_addr - return 8; - case 1: //vgpr_src - return 8; - case 2: //vgpr_dst - return 8; - default: - fatal("op idx %i out of bounds\n", opIdx); - return -1; - } - } // getOperandSize - - void execute(GPUDynInstPtr) override; - void initiateAcc(GPUDynInstPtr) override; - void completeAcc(GPUDynInstPtr) override; - }; // Inst_FLAT__FLAT_ATOMIC_DEC_X2 -} // namespace Gcn3ISA -} // namespace gem5 - -#endif // __ARCH_GCN3_INSTS_INSTRUCTIONS_HH__ diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.cc b/src/arch/amdgpu/gcn3/insts/op_encodings.cc deleted file mode 100644 index 41b4feefc1..0000000000 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.cc +++ /dev/null @@ -1,1592 +0,0 @@ -/* - * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/amdgpu/gcn3/insts/op_encodings.hh" - -#include - -namespace gem5 -{ - -namespace Gcn3ISA -{ - // --- Inst_SOP2 base class methods --- - - Inst_SOP2::Inst_SOP2(InFmt_SOP2 *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Scalar); - - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - } else { - varSize = 4; - } // if - } // Inst_SOP2 - - void - Inst_SOP2::initOperandInfo() - { - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = instData.SSRC0; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(instData.SSRC0), false, false); - opNum++; - - reg = instData.SSRC1; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(instData.SSRC1), false, false); - opNum++; - - reg = instData.SDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - isScalarReg(instData.SDST), false, false); - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_SOP2::instSize() const - { - return varSize; - } // instSize - - bool - Inst_SOP2::hasSecondDword(InFmt_SOP2 *iFmt) - { - if (iFmt->SSRC0 == REG_SRC_LITERAL) - return true; - - if (iFmt->SSRC1 == REG_SRC_LITERAL) - return true; - - return false; - } - - void - Inst_SOP2::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - dis_stream << opSelectorToRegSym(instData.SDST) << ", "; - - if (instData.SSRC0 == REG_SRC_LITERAL) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << _srcLiteral << ", "; - } else { - dis_stream << opSelectorToRegSym(instData.SSRC0) << ", "; - } - - if (instData.SSRC1 == REG_SRC_LITERAL) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << _srcLiteral; - } else { - dis_stream << opSelectorToRegSym(instData.SSRC1); - } - - disassembly = dis_stream.str(); - } - - // --- Inst_SOPK base class methods --- - - Inst_SOPK::Inst_SOPK(InFmt_SOPK *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Scalar); - - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - } else { - varSize = 4; - } // if - } // Inst_SOPK - - Inst_SOPK::~Inst_SOPK() - { - } // ~Inst_SOPK - - void - Inst_SOPK::initOperandInfo() - { - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = instData.SDST; - if (numSrcRegOperands() == getNumOperands()) { - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - } - - reg = instData.SIMM16; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, false, true); - opNum++; - - if (numDstRegOperands()){ - reg = instData.SDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - isScalarReg(reg), false, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_SOPK::instSize() const - { - return varSize; - } // instSize - - bool - Inst_SOPK::hasSecondDword(InFmt_SOPK *iFmt) - { - /* - SOPK can be a 64-bit instruction, i.e., have a second dword: - S_SETREG_IMM32_B32 writes some or all of the LSBs of a 32-bit - literal constant into a hardware register; - the way to detect such special case is to explicitly check the - opcode (20/0x14) - */ - if (iFmt->OP == 0x14) - return true; - - return false; - } - - - void - Inst_SOPK::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - - // S_SETREG_IMM32_B32 is a 64-bit instruction, using a - // 32-bit literal constant - if (instData.OP == 0x14) { - dis_stream << "0x" << std::hex << std::setfill('0') - << std::setw(8) << extData.imm_u32 << ", "; - } else { - dis_stream << opSelectorToRegSym(instData.SDST) << ", "; - } - - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(4) - << instData.SIMM16; - - disassembly = dis_stream.str(); - } - - // --- Inst_SOP1 base class methods --- - - Inst_SOP1::Inst_SOP1(InFmt_SOP1 *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Scalar); - - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - } else { - varSize = 4; - } // if - } // Inst_SOP1 - - Inst_SOP1::~Inst_SOP1() - { - } // ~Inst_SOP1 - - void - Inst_SOP1::initOperandInfo() - { - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = instData.SSRC0; - if (instData.OP != 0x1C) { - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(instData.SSRC0), false, false); - opNum++; - } - - reg = instData.SDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - isScalarReg(instData.SDST), false, false); - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_SOP1::instSize() const - { - return varSize; - } // instSize - - bool - Inst_SOP1::hasSecondDword(InFmt_SOP1 *iFmt) - { - if (iFmt->SSRC0 == REG_SRC_LITERAL) - return true; - - return false; - } - - void - Inst_SOP1::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - dis_stream << opSelectorToRegSym(instData.SDST) << ", "; - - if (instData.SSRC0 == REG_SRC_LITERAL) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << extData.imm_u32; - } else { - dis_stream << opSelectorToRegSym(instData.SSRC0); - } - - disassembly = dis_stream.str(); - } - - // --- Inst_SOPC base class methods --- - - Inst_SOPC::Inst_SOPC(InFmt_SOPC *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Scalar); - - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - } else { - varSize = 4; - } // if - } // Inst_SOPC - - Inst_SOPC::~Inst_SOPC() - { - } // ~Inst_SOPC - - void - Inst_SOPC::initOperandInfo() - { - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = instData.SSRC0; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(instData.SSRC0), false, false); - opNum++; - - reg = instData.SSRC1; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(instData.SSRC1), false, false); - - } - - int - Inst_SOPC::instSize() const - { - return varSize; - } // instSize - - bool - Inst_SOPC::hasSecondDword(InFmt_SOPC *iFmt) - { - if (iFmt->SSRC0 == REG_SRC_LITERAL) - return true; - - if (iFmt->SSRC1 == REG_SRC_LITERAL) - return true; - - return false; - } - - void - Inst_SOPC::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - - if (instData.SSRC0 == REG_SRC_LITERAL) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << extData.imm_u32; - } else { - dis_stream << opSelectorToRegSym(instData.SSRC0) << ", "; - } - - if (instData.SSRC1 == REG_SRC_LITERAL) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << extData.imm_u32; - } else { - dis_stream << opSelectorToRegSym(instData.SSRC1); - } - - disassembly = dis_stream.str(); - } - - // --- Inst_SOPP base class methods --- - - Inst_SOPP::Inst_SOPP(InFmt_SOPP *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Scalar); - - // copy first instruction DWORD - instData = iFmt[0]; - } // Inst_SOPP - - Inst_SOPP::~Inst_SOPP() - { - } // ~Inst_SOPP - - void - Inst_SOPP::initOperandInfo() - { - int opNum = 0; - - - if (numSrcRegOperands()) { - // Needed because can't take addr of bitfield - int reg = instData.SIMM16; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, false, true); - - opNum++; - - if (readsVCC()) { - srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true, - true, false, false); - opNum++; - } - } - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_SOPP::instSize() const - { - return 4; - } // instSize - - void - Inst_SOPP::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode; - - switch (instData.OP) { - case 8: - { - dis_stream << " "; - int dest = 4 * instData.SIMM16 + 4; - dis_stream << "label_" << std::hex << dest; - } - break; - case 12: - { - dis_stream << " "; - - int vm_cnt = 0; - int exp_cnt = 0; - int lgkm_cnt = 0; - - vm_cnt = bits(instData.SIMM16, 3, 0); - exp_cnt = bits(instData.SIMM16, 6, 4); - lgkm_cnt = bits(instData.SIMM16, 11, 8); - - // if the counts are not maxed out, then we - // print out the count value - if (vm_cnt != 0xf) { - dis_stream << "vmcnt(" << vm_cnt << ")"; - } - - if (lgkm_cnt != 0xf) { - if (vm_cnt != 0xf) - dis_stream << " & "; - - dis_stream << "lgkmcnt(" << lgkm_cnt << ")"; - } - - if (exp_cnt != 0x7) { - if (vm_cnt != 0xf || lgkm_cnt != 0xf) - dis_stream << " & "; - - dis_stream << "expcnt(" << exp_cnt << ")"; - } - } - break; - default: - break; - } - - disassembly = dis_stream.str(); - } - - // --- Inst_SMEM base class methods --- - - Inst_SMEM::Inst_SMEM(InFmt_SMEM *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Scalar); - setFlag(GlobalSegment); - - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_SMEM_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - - if (instData.GLC) - setFlag(GloballyCoherent); - } // Inst_SMEM - - Inst_SMEM::~Inst_SMEM() - { - } // ~Inst_SMEM - - void - Inst_SMEM::initOperandInfo() - { - // Formats: - // 0 src + 0 dst - // 3 src + 0 dst - // 2 src + 1 dst - // 0 src + 1 dst - int opNum = 0; - // Needed because can't take addr of bitfield - int reg = 0; - - if (numSrcRegOperands()) { - reg = instData.SDATA; - if (numSrcRegOperands() == getNumOperands()) { - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - } - - reg = instData.SBASE; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - true, false, false); - opNum++; - - reg = extData.OFFSET; - if (instData.IMM) { - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, false, true); - } else { - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - } - opNum++; - } - - if (numDstRegOperands()) { - reg = instData.SDATA; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - isScalarReg(reg), false, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_SMEM::instSize() const - { - return 8; - } // instSize - - void - Inst_SMEM::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - if (numDstRegOperands()) { - if (getOperandSize(getNumOperands() - 1) > 4) { - dis_stream << "s[" << instData.SDATA << ":" - << instData.SDATA + getOperandSize(getNumOperands() - 1) / - 4 - 1 << "], "; - } else { - dis_stream << "s" << instData.SDATA << ", "; - } - } - - // SBASE has an implied LSB of 0, so we need - // to shift by one to get the actual value - dis_stream << "s[" << (instData.SBASE << 1) << ":" - << ((instData.SBASE << 1) + 1) << "], "; - - if (instData.IMM) { - // IMM == 1 implies OFFSET should be - // used as the offset - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(2) - << extData.OFFSET; - } else { - // IMM == 0 implies OFFSET should be - // used to specify SGRP in which the - // offset is held - dis_stream << "s" << extData.OFFSET; - } - - disassembly = dis_stream.str(); - } - - // --- Inst_VOP2 base class methods --- - - Inst_VOP2::Inst_VOP2(InFmt_VOP2 *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - if (iFmt->SRC0 == REG_SRC_DPP) { - setFlag(IsDPP); - } else if (iFmt->SRC0 == REG_SRC_SWDA) { - setFlag(IsSDWA); - } - } else { - varSize = 4; - } // if - } // Inst_VOP2 - - Inst_VOP2::~Inst_VOP2() - { - } // ~Inst_VOP2 - - void - Inst_VOP2::initOperandInfo() - { - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = instData.SRC0; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), isVectorReg(reg), false); - opNum++; - - reg = instData.VSRC1; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - - // VCC read - if (readsVCC()) { - srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true, - true, false, false); - opNum++; - } - - // VDST - reg = instData.VDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - opNum++; - - // VCC write - if (writesVCC()) { - dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false, - true, false, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_VOP2::instSize() const - { - return varSize; - } // instSize - - bool - Inst_VOP2::hasSecondDword(InFmt_VOP2 *iFmt) - { - /* - There are a few cases where VOP2 instructions have a second dword: - - 1. SRC0 is a literal - 2. SRC0 is being used to add a data parallel primitive (DPP) - operation to the instruction. - 3. SRC0 is being used for sub d-word addressing (SDWA) of the - operands in the instruction. - 4. VOP2 instructions also have four special opcodes:', - V_MADMK_{F16, F32} (0x24, 0x17), and V_MADAK_{F16, F32}', - (0x25, 0x18), that are always 64b. the only way to', - detect these special cases is to explicitly check,', - the opcodes', - */ - if (iFmt->SRC0 == REG_SRC_LITERAL || (iFmt->SRC0 == REG_SRC_DPP) || - (iFmt->SRC0 == REG_SRC_SWDA) || iFmt->OP == 0x17 || - iFmt->OP == 0x18 || iFmt->OP == 0x24 || iFmt->OP == 0x25) - return true; - - return false; - } - - void - Inst_VOP2::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - dis_stream << "v" << instData.VDST << ", "; - - if (writesVCC()) - dis_stream << "vcc, "; - - if ((instData.SRC0 == REG_SRC_LITERAL) || - (instData.SRC0 == REG_SRC_DPP) || - (instData.SRC0 == REG_SRC_SWDA)) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << _srcLiteral << ", "; - } else { - dis_stream << opSelectorToRegSym(instData.SRC0) << ", "; - } - - // VOP2 instructions have four special opcodes:', - // V_MADMK_{F16, F32} (0x24, 0x17), and V_MADAK_{F16, F32}', - // (0x25, 0x18), that are always 64b. the only way to', - // detect these special cases is to explicitly check,', - // the opcodes', - if (instData.OP == 0x17 || instData.OP == 0x18 || instData.OP == 0x24 - || instData.OP == 0x25) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << extData.imm_u32 << ", "; - } - - dis_stream << std::resetiosflags(std::ios_base::basefield) << "v" - << instData.VSRC1; - - if (readsVCC()) - dis_stream << ", vcc"; - - disassembly = dis_stream.str(); - } - - // --- Inst_VOP1 base class methods --- - - Inst_VOP1::Inst_VOP1(InFmt_VOP1 *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - if (iFmt->SRC0 == REG_SRC_DPP) { - setFlag(IsDPP); - } else if (iFmt->SRC0 == REG_SRC_SWDA) { - setFlag(IsSDWA); - } - } else { - varSize = 4; - } // if - } // Inst_VOP1 - - Inst_VOP1::~Inst_VOP1() - { - } // ~Inst_VOP1 - - void - Inst_VOP1::initOperandInfo() - { - int opNum = 0; - // Needed because can't take addr of bitfield - int reg = instData.SRC0; - - if (numSrcRegOperands()) { - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), isVectorReg(reg), false); - opNum++; - } - - if (numDstRegOperands()) { - reg = instData.VDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_VOP1::instSize() const - { - return varSize; - } // instSize - - bool - Inst_VOP1::hasSecondDword(InFmt_VOP1 *iFmt) - { - /* - There are several cases where VOP1 instructions have a second dword: - - 1. SRC0 is a literal. - 2. SRC0 is being used to add a data parallel primitive (DPP) - operation to the instruction. - 3. SRC0 is being used for sub d-word addressing (SDWA) of the - operands in the instruction. - */ - if ((iFmt->SRC0 == REG_SRC_LITERAL) || (iFmt->SRC0 == REG_SRC_DPP) || - (iFmt->SRC0 == REG_SRC_SWDA)) - return true; - - return false; - } - - void - Inst_VOP1::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - dis_stream << "v" << instData.VDST << ", "; - - if ((instData.SRC0 == REG_SRC_LITERAL) || - (instData.SRC0 == REG_SRC_DPP) || - (instData.SRC0 == REG_SRC_SWDA)) { - dis_stream << "0x" << std::hex << std::setfill('0') << std::setw(8) - << _srcLiteral; - } else { - dis_stream << opSelectorToRegSym(instData.SRC0); - } - - disassembly = dis_stream.str(); - } - - // --- Inst_VOPC base class methods --- - - Inst_VOPC::Inst_VOPC(InFmt_VOPC *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(WritesVCC); - // copy first instruction DWORD - instData = iFmt[0]; - if (hasSecondDword(iFmt)) { - // copy second instruction DWORD into union - extData = ((MachInst)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - varSize = 4 + 4; - if (iFmt->SRC0 == REG_SRC_DPP) { - setFlag(IsDPP); - } else if (iFmt->SRC0 == REG_SRC_SWDA) { - setFlag(IsSDWA); - } - } else { - varSize = 4; - } // if - } // Inst_VOPC - - Inst_VOPC::~Inst_VOPC() - { - } // ~Inst_VOPC - - void - Inst_VOPC::initOperandInfo() - { - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = instData.SRC0; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), isVectorReg(reg), false); - opNum++; - - reg = instData.VSRC1; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - - assert(writesVCC()); - dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false, - true, false, false); - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_VOPC::instSize() const - { - return varSize; - } // instSize - - bool - Inst_VOPC::hasSecondDword(InFmt_VOPC *iFmt) - { - /* - There are several cases where VOPC instructions have a second dword: - - 1. SRC0 is a literal. - 2. SRC0 is being used to add a data parallel primitive (DPP) - operation to the instruction. - 3. SRC0 is being used for sub d-word addressing (SDWA) of the - operands in the instruction. - */ - if ((iFmt->SRC0 == REG_SRC_LITERAL) || (iFmt->SRC0 == REG_SRC_DPP) || - (iFmt->SRC0 == REG_SRC_SWDA)) - return true; - - return false; - } - - void - Inst_VOPC::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " vcc, "; - - dis_stream << opSelectorToRegSym(instData.SRC0) << ", "; - dis_stream << "v" << instData.VSRC1; - - disassembly = dis_stream.str(); - } - - // --- Inst_VINTRP base class methods --- - - Inst_VINTRP::Inst_VINTRP(InFmt_VINTRP *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - } // Inst_VINTRP - - Inst_VINTRP::~Inst_VINTRP() - { - } // ~Inst_VINTRP - - int - Inst_VINTRP::instSize() const - { - return 4; - } // instSize - - // --- Inst_VOP3 base class methods --- - - Inst_VOP3::Inst_VOP3(InFmt_VOP3 *iFmt, const std::string &opcode, - bool sgpr_dst) - : GCN3GPUStaticInst(opcode), sgprDst(sgpr_dst) - { - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_VOP3_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - } // Inst_VOP3 - - Inst_VOP3::~Inst_VOP3() - { - } // ~Inst_VOP3 - - void - Inst_VOP3::initOperandInfo() - { - // Also takes care of bitfield addr issue - unsigned int srcs[3] = {extData.SRC0, extData.SRC1, extData.SRC2}; - - int opNum = 0; - - int numSrc = numSrcRegOperands() - readsVCC(); - int numDst = numDstRegOperands() - writesVCC(); - - for (opNum = 0; opNum < numSrc; opNum++) { - srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true, - isScalarReg(srcs[opNum]), - isVectorReg(srcs[opNum]), false); - } - - if (readsVCC()) { - srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true, - true, false, false); - opNum++; - } - - if (numDst) { - // Needed because can't take addr of bitfield - int reg = instData.VDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - sgprDst, !sgprDst, false); - opNum++; - } - - if (writesVCC()) { - dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false, - true, false, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_VOP3::instSize() const - { - return 8; - } // instSize - - void - Inst_VOP3::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - int num_regs = 0; - - if (getOperandSize(getNumOperands() - 1) > 4) { - num_regs = getOperandSize(getNumOperands() - 1) / 4; - if (sgprDst) - dis_stream << "s["; - else - dis_stream << "v["; - dis_stream << instData.VDST << ":" << instData.VDST + - num_regs - 1 << "], "; - } else { - if (sgprDst) - dis_stream << "s"; - else - dis_stream << "v"; - dis_stream << instData.VDST << ", "; - } - - num_regs = getOperandSize(0) / 4; - - if (extData.NEG & 0x1) { - dis_stream << "-" << opSelectorToRegSym(extData.SRC0, num_regs); - } else { - dis_stream << opSelectorToRegSym(extData.SRC0, num_regs); - } - - if (numSrcRegOperands() > 1) { - num_regs = getOperandSize(1) / 4; - - if (extData.NEG & 0x2) { - dis_stream << ", -" - << opSelectorToRegSym(extData.SRC1, num_regs); - } else { - dis_stream << ", " - << opSelectorToRegSym(extData.SRC1, num_regs); - } - } - - if (numSrcRegOperands() > 2) { - num_regs = getOperandSize(2) / 4; - - if (extData.NEG & 0x4) { - dis_stream << ", -" - << opSelectorToRegSym(extData.SRC2, num_regs); - } else { - dis_stream << ", " - << opSelectorToRegSym(extData.SRC2, num_regs); - } - } - - disassembly = dis_stream.str(); - } - - // --- Inst_VOP3_SDST_ENC base class methods --- - - Inst_VOP3_SDST_ENC::Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC *iFmt, - const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_VOP3_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - } // Inst_VOP3_SDST_ENC - - Inst_VOP3_SDST_ENC::~Inst_VOP3_SDST_ENC() - { - } // ~Inst_VOP3_SDST_ENC - - void - Inst_VOP3_SDST_ENC::initOperandInfo() - { - // Also takes care of bitfield addr issue - unsigned int srcs[3] = {extData.SRC0, extData.SRC1, extData.SRC2}; - - int opNum = 0; - - int numSrc = numSrcRegOperands() - readsVCC(); - int numDst = numDstRegOperands() - writesVCC(); - - for (opNum = 0; opNum < numSrc; opNum++) { - srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true, - isScalarReg(srcs[opNum]), - isVectorReg(srcs[opNum]), false); - } - - if (readsVCC()) { - srcOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), true, - true, false, false); - opNum++; - } - - if (numDst) { - // Needed because can't take addr of bitfield - int reg = instData.VDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - opNum++; - } - - if (writesVCC()) { - dstOps.emplace_back(REG_VCC_LO, getOperandSize(opNum), false, - true, false, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_VOP3_SDST_ENC::instSize() const - { - return 8; - } // instSize - - void - Inst_VOP3_SDST_ENC::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - - dis_stream << "v" << instData.VDST << ", "; - - if (numDstRegOperands() == 2) { - if (getOperandSize(getNumOperands() - 1) > 4) { - int num_regs = getOperandSize(getNumOperands() - 1) / 4; - dis_stream << opSelectorToRegSym(instData.SDST, num_regs) - << ", "; - } else { - dis_stream << opSelectorToRegSym(instData.SDST) << ", "; - } - } - - if (extData.NEG & 0x1) { - dis_stream << "-" << opSelectorToRegSym(extData.SRC0) << ", "; - } else { - dis_stream << opSelectorToRegSym(extData.SRC0) << ", "; - } - - if (extData.NEG & 0x2) { - dis_stream << "-" << opSelectorToRegSym(extData.SRC1); - } else { - dis_stream << opSelectorToRegSym(extData.SRC1); - } - - if (numSrcRegOperands() == 3) { - if (extData.NEG & 0x4) { - dis_stream << ", -" << opSelectorToRegSym(extData.SRC2); - } else { - dis_stream << ", " << opSelectorToRegSym(extData.SRC2); - } - } - - if (readsVCC()) - dis_stream << ", vcc"; - - disassembly = dis_stream.str(); - } - - // --- Inst_DS base class methods --- - - Inst_DS::Inst_DS(InFmt_DS *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(GroupSegment); - - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_DS_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - } // Inst_DS - - Inst_DS::~Inst_DS() - { - } // ~Inst_DS - - void - Inst_DS::initOperandInfo() - { - unsigned int srcs[3] = {extData.ADDR, extData.DATA0, extData.DATA1}; - - int opIdx = 0; - - for (opIdx = 0; opIdx < numSrcRegOperands(); opIdx++){ - srcOps.emplace_back(srcs[opIdx], getOperandSize(opIdx), true, - false, true, false); - } - - if (numDstRegOperands()) { - // Needed because can't take addr of bitfield - int reg = extData.VDST; - dstOps.emplace_back(reg, getOperandSize(opIdx), false, - false, true, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_DS::instSize() const - { - return 8; - } // instSize - - void - Inst_DS::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - - if (numDstRegOperands()) - dis_stream << "v" << extData.VDST << ", "; - - dis_stream << "v" << extData.ADDR; - - if (numSrcRegOperands() > 1) - dis_stream << ", v" << extData.DATA0; - - if (numSrcRegOperands() > 2) - dis_stream << ", v" << extData.DATA1; - - uint16_t offset = 0; - - if (instData.OFFSET1) { - offset += instData.OFFSET1; - offset <<= 8; - } - - if (instData.OFFSET0) - offset += instData.OFFSET0; - - if (offset) - dis_stream << " offset:" << offset; - - disassembly = dis_stream.str(); - } - - // --- Inst_MUBUF base class methods --- - - Inst_MUBUF::Inst_MUBUF(InFmt_MUBUF *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_MUBUF_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - - if (instData.GLC) - setFlag(GloballyCoherent); - - if (instData.SLC) - setFlag(SystemCoherent); - } // Inst_MUBUF - - Inst_MUBUF::~Inst_MUBUF() - { - } // ~Inst_MUBUF - - void - Inst_MUBUF::initOperandInfo() - { - // Currently there are three formats: - // 0 src + 0 dst - // 3 src + 1 dst - // 4 src + 0 dst - int opNum = 0; - - // Needed because can't take addr of bitfield; - int reg = 0; - - if (numSrcRegOperands()) { - if (numSrcRegOperands() == getNumOperands()) { - reg = extData.VDATA; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - } - - reg = extData.VADDR; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - - reg = extData.SRSRC; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - - reg = extData.SOFFSET; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - } - - // extData.VDATA moves in the reg list depending on the instruction - if (numDstRegOperands()) { - reg = extData.VDATA; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_MUBUF::instSize() const - { - return 8; - } // instSize - - void - Inst_MUBUF::generateDisassembly() - { - // SRSRC is always in units of 4 SGPRs - int srsrc_val = extData.SRSRC * 4; - std::stringstream dis_stream; - dis_stream << _opcode << " "; - dis_stream << "v" << extData.VDATA << ", v" << extData.VADDR << ", "; - dis_stream << "s[" << srsrc_val << ":" - << srsrc_val + 3 << "], "; - dis_stream << "s" << extData.SOFFSET; - - if (instData.OFFSET) - dis_stream << ", offset:" << instData.OFFSET; - - disassembly = dis_stream.str(); - } - - // --- Inst_MTBUF base class methods --- - - Inst_MTBUF::Inst_MTBUF(InFmt_MTBUF *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_MTBUF_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - - if (instData.GLC) - setFlag(GloballyCoherent); - - if (extData.SLC) - setFlag(SystemCoherent); - } // Inst_MTBUF - - Inst_MTBUF::~Inst_MTBUF() - { - } // ~Inst_MTBUF - - void - Inst_MTBUF::initOperandInfo() - { - // Currently there are two formats: - // 3 src + 1 dst - // 4 src + 0 dst - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = 0; - - if (numSrcRegOperands() == getNumOperands()) { - reg = extData.VDATA; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - } - - reg = extData.VADDR; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - - reg = extData.SRSRC; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - - reg = extData.SOFFSET; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - - // extData.VDATA moves in the reg list depending on the instruction - if (numDstRegOperands()) { - reg = extData.VDATA; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_MTBUF::instSize() const - { - return 8; - } // instSize - - // --- Inst_MIMG base class methods --- - - Inst_MIMG::Inst_MIMG(InFmt_MIMG *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_MIMG_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - - if (instData.GLC) - setFlag(GloballyCoherent); - - if (instData.SLC) - setFlag(SystemCoherent); - } // Inst_MIMG - - Inst_MIMG::~Inst_MIMG() - { - } // ~Inst_MIMG - - void - Inst_MIMG::initOperandInfo() - { - // Three formats: - // 1 dst + 2 src : s,s,d - // 0 dst + 3 src : s,s,s - // 1 dst + 3 src : s,s,s,d - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = 0; - - if (numSrcRegOperands() == getNumOperands()) { - reg = extData.VDATA; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - } - - reg = extData.VADDR; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - - reg = extData.SRSRC; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - - if (getNumOperands() == 4) { - reg = extData.SSAMP; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - isScalarReg(reg), false, false); - opNum++; - } - - // extData.VDATA moves in the reg list depending on the instruction - if (numDstRegOperands()) { - reg = extData.VDATA; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_MIMG::instSize() const - { - return 8; - } // instSize - - // --- Inst_EXP base class methods --- - - Inst_EXP::Inst_EXP(InFmt_EXP *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_EXP_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - } // Inst_EXP - - Inst_EXP::~Inst_EXP() - { - } // ~Inst_EXP - - void - Inst_EXP::initOperandInfo() - { - // Only 1 instruction, 1 format: 1 dst + 4 src - int opNum = 0; - - // Avoids taking addr of bitfield - unsigned int srcs[4] = {extData.VSRC0, extData.VSRC1, - extData.VSRC2, extData.VSRC3}; - - for (opNum = 0; opNum < 4; opNum++) { - srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true, - false, true, false); - } - - //TODO: Add the dst operand, don't know what it is right now - } - - int - Inst_EXP::instSize() const - { - return 8; - } // instSize - - // --- Inst_FLAT base class methods --- - - Inst_FLAT::Inst_FLAT(InFmt_FLAT *iFmt, const std::string &opcode) - : GCN3GPUStaticInst(opcode) - { - setFlag(Flat); - // copy first instruction DWORD - instData = iFmt[0]; - // copy second instruction DWORD - extData = ((InFmt_FLAT_1 *)iFmt)[1]; - _srcLiteral = *reinterpret_cast(&iFmt[1]); - - if (instData.GLC) - setFlag(GloballyCoherent); - - if (instData.SLC) - setFlag(SystemCoherent); - } // Inst_FLAT - - Inst_FLAT::~Inst_FLAT() - { - } // ~Inst_FLAT - - void - Inst_FLAT::initOperandInfo() - { - //3 formats: - // 1 dst + 1 src (load) - // 0 dst + 2 src (store) - // 1 dst + 2 src (atomic) - int opNum = 0; - - // Needed because can't take addr of bitfield - int reg = 0; - - if (getNumOperands() > 2) - assert(isAtomic()); - - reg = extData.ADDR; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - - if (numSrcRegOperands() == 2) { - reg = extData.DATA; - srcOps.emplace_back(reg, getOperandSize(opNum), true, - false, true, false); - opNum++; - } - - if (numDstRegOperands()) { - reg = extData.VDST; - dstOps.emplace_back(reg, getOperandSize(opNum), false, - false, true, false); - } - - assert(srcOps.size() == numSrcRegOperands()); - assert(dstOps.size() == numDstRegOperands()); - } - - int - Inst_FLAT::instSize() const - { - return 8; - } // instSize - - void - Inst_FLAT::generateDisassembly() - { - std::stringstream dis_stream; - dis_stream << _opcode << " "; - - if (isLoad()) - dis_stream << "v" << extData.VDST << ", "; - - dis_stream << "v[" << extData.ADDR << ":" << extData.ADDR + 1 << "]"; - - if (isStore()) - dis_stream << ", v" << extData.DATA; - - disassembly = dis_stream.str(); - } -} // namespace Gcn3ISA -} // namespace gem5 diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.hh b/src/arch/amdgpu/gcn3/insts/op_encodings.hh deleted file mode 100644 index 880ccc4503..0000000000 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.hh +++ /dev/null @@ -1,925 +0,0 @@ -/* - * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ -#define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ - -#include "arch/amdgpu/gcn3/gpu_decoder.hh" -#include "arch/amdgpu/gcn3/gpu_mem_helpers.hh" -#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" -#include "arch/amdgpu/gcn3/operand.hh" -#include "debug/GCN3.hh" -#include "debug/GPUExec.hh" -#include "mem/ruby/system/RubySystem.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - struct BufferRsrcDescriptor - { - uint64_t baseAddr : 48; - uint32_t stride : 14; - uint32_t cacheSwizzle : 1; - uint32_t swizzleEn : 1; - uint32_t numRecords : 32; - uint32_t dstSelX : 3; - uint32_t dstSelY : 3; - uint32_t dstSelZ : 3; - uint32_t dstSelW : 3; - uint32_t numFmt : 3; - uint32_t dataFmt : 4; - uint32_t elemSize : 2; - uint32_t idxStride : 2; - uint32_t addTidEn : 1; - uint32_t atc : 1; - uint32_t hashEn : 1; - uint32_t heap : 1; - uint32_t mType : 3; - uint32_t type : 2; - }; - - // --- purely virtual instruction classes --- - - class Inst_SOP2 : public GCN3GPUStaticInst - { - public: - Inst_SOP2(InFmt_SOP2*, const std::string &opcode); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_SOP2 instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_SOP2 *); - }; // Inst_SOP2 - - class Inst_SOPK : public GCN3GPUStaticInst - { - public: - Inst_SOPK(InFmt_SOPK*, const std::string &opcode); - ~Inst_SOPK(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_SOPK instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_SOPK *); - }; // Inst_SOPK - - class Inst_SOP1 : public GCN3GPUStaticInst - { - public: - Inst_SOP1(InFmt_SOP1*, const std::string &opcode); - ~Inst_SOP1(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_SOP1 instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_SOP1 *); - }; // Inst_SOP1 - - class Inst_SOPC : public GCN3GPUStaticInst - { - public: - Inst_SOPC(InFmt_SOPC*, const std::string &opcode); - ~Inst_SOPC(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_SOPC instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_SOPC *); - }; // Inst_SOPC - - class Inst_SOPP : public GCN3GPUStaticInst - { - public: - Inst_SOPP(InFmt_SOPP*, const std::string &opcode); - ~Inst_SOPP(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_SOPP instData; - }; // Inst_SOPP - - class Inst_SMEM : public GCN3GPUStaticInst - { - public: - Inst_SMEM(InFmt_SMEM*, const std::string &opcode); - ~Inst_SMEM(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - /** - * initiate a memory read access for N dwords - */ - template - void - initMemRead(GPUDynInstPtr gpuDynInst) - { - initMemReqScalarHelper(gpuDynInst, - MemCmd::ReadReq); - } - - /** - * initiate a memory write access for N dwords - */ - template - void - initMemWrite(GPUDynInstPtr gpuDynInst) - { - initMemReqScalarHelper(gpuDynInst, - MemCmd::WriteReq); - } - - /** - * For normal s_load_dword/s_store_dword instruction addresses. - */ - void - calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, - ScalarRegU32 offset) - { - Addr vaddr = ((addr.rawData() + offset) & ~0x3); - gpu_dyn_inst->scalarAddr = vaddr; - } - - /** - * For s_buffer_load_dword/s_buffer_store_dword instruction addresses. - * The s_buffer instructions use the same buffer resource descriptor - * as the MUBUF instructions. - */ - void - calcAddr(GPUDynInstPtr gpu_dyn_inst, - ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset) - { - BufferRsrcDescriptor rsrc_desc; - ScalarRegU32 clamped_offset(offset); - std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(), - sizeof(BufferRsrcDescriptor)); - - /** - * The address is clamped if: - * Stride is zero: clamp if offset >= num_records - * Stride is non-zero: clamp if offset > (stride * num_records) - */ - if (!rsrc_desc.stride && offset >= rsrc_desc.numRecords) { - clamped_offset = rsrc_desc.numRecords; - } else if (rsrc_desc.stride && offset - > (rsrc_desc.stride * rsrc_desc.numRecords)) { - clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords); - } - - Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3); - gpu_dyn_inst->scalarAddr = vaddr; - } - - // first instruction DWORD - InFmt_SMEM instData; - // second instruction DWORD - InFmt_SMEM_1 extData; - }; // Inst_SMEM - - class Inst_VOP2 : public GCN3GPUStaticInst - { - public: - Inst_VOP2(InFmt_VOP2*, const std::string &opcode); - ~Inst_VOP2(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_VOP2 instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_VOP2 *); - }; // Inst_VOP2 - - class Inst_VOP1 : public GCN3GPUStaticInst - { - public: - Inst_VOP1(InFmt_VOP1*, const std::string &opcode); - ~Inst_VOP1(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_VOP1 instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_VOP1 *); - }; // Inst_VOP1 - - class Inst_VOPC : public GCN3GPUStaticInst - { - public: - Inst_VOPC(InFmt_VOPC*, const std::string &opcode); - ~Inst_VOPC(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_VOPC instData; - // possible second DWORD - InstFormat extData; - uint32_t varSize; - - private: - bool hasSecondDword(InFmt_VOPC *); - }; // Inst_VOPC - - class Inst_VINTRP : public GCN3GPUStaticInst - { - public: - Inst_VINTRP(InFmt_VINTRP*, const std::string &opcode); - ~Inst_VINTRP(); - - int instSize() const override; - - protected: - // first instruction DWORD - InFmt_VINTRP instData; - }; // Inst_VINTRP - - class Inst_VOP3 : public GCN3GPUStaticInst - { - public: - Inst_VOP3(InFmt_VOP3*, const std::string &opcode, bool sgpr_dst); - ~Inst_VOP3(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_VOP3 instData; - // second instruction DWORD - InFmt_VOP3_1 extData; - - private: - bool hasSecondDword(InFmt_VOP3 *); - /** - * the v_cmp and readlane instructions in the VOP3 - * encoding are unique because they are the only - * instructions that use the VDST field to specify - * a scalar register destination. for VOP3::V_CMP insts - * VDST specifies the arbitrary SGPR pair used to write - * VCC. for V_READLANE VDST specifies the SGPR to return - * the value of the selected lane in the source VGPR - * from which we are reading. - */ - const bool sgprDst; - }; // Inst_VOP3 - - class Inst_VOP3_SDST_ENC : public GCN3GPUStaticInst - { - public: - Inst_VOP3_SDST_ENC(InFmt_VOP3_SDST_ENC*, const std::string &opcode); - ~Inst_VOP3_SDST_ENC(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_VOP3_SDST_ENC instData; - // second instruction DWORD - InFmt_VOP3_1 extData; - - private: - bool hasSecondDword(InFmt_VOP3_SDST_ENC *); - }; // Inst_VOP3_SDST_ENC - - class Inst_DS : public GCN3GPUStaticInst - { - public: - Inst_DS(InFmt_DS*, const std::string &opcode); - ~Inst_DS(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - template - void - initMemRead(GPUDynInstPtr gpuDynInst, Addr offset) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane] + offset; - - (reinterpret_cast(gpuDynInst->d_data))[lane] - = wf->ldsChunk->read(vaddr); - } - } - } - - template - void - initMemRead(GPUDynInstPtr gpuDynInst, Addr offset) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane] + offset; - for (int i = 0; i < N; ++i) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * N + i] - = wf->ldsChunk->read( - vaddr + i*sizeof(VecElemU32)); - } - } - } - } - - template - void - initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr0 = gpuDynInst->addr[lane] + offset0; - Addr vaddr1 = gpuDynInst->addr[lane] + offset1; - - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = wf->ldsChunk->read(vaddr0); - (reinterpret_cast(gpuDynInst->d_data))[lane * 2 + 1] - = wf->ldsChunk->read(vaddr1); - } - } - } - - template - void - initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane] + offset; - wf->ldsChunk->write(vaddr, - (reinterpret_cast(gpuDynInst->d_data))[lane]); - } - } - } - - template - void - initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane] + offset; - for (int i = 0; i < N; ++i) { - wf->ldsChunk->write( - vaddr + i*sizeof(VecElemU32), - (reinterpret_cast( - gpuDynInst->d_data))[lane * N + i]); - } - } - } - } - - template - void - initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr0 = gpuDynInst->addr[lane] + offset0; - Addr vaddr1 = gpuDynInst->addr[lane] + offset1; - wf->ldsChunk->write(vaddr0, (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]); - wf->ldsChunk->write(vaddr1, (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]); - } - } - } - - void - calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr) - { - Wavefront *wf = gpuDynInst->wavefront(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - gpuDynInst->addr.at(lane) = (Addr)addr[lane]; - } - } - } - - // first instruction DWORD - InFmt_DS instData; - // second instruction DWORD - InFmt_DS_1 extData; - }; // Inst_DS - - class Inst_MUBUF : public GCN3GPUStaticInst - { - public: - Inst_MUBUF(InFmt_MUBUF*, const std::string &opcode); - ~Inst_MUBUF(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - template - void - initMemRead(GPUDynInstPtr gpuDynInst) - { - // temporarily modify exec_mask to supress memory accesses to oob - // regions. Only issue memory requests for lanes that have their - // exec_mask set and are not out of bounds. - VectorMask old_exec_mask = gpuDynInst->exec_mask; - gpuDynInst->exec_mask &= ~oobMask; - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); - gpuDynInst->exec_mask = old_exec_mask; - } - - - template - void - initMemRead(GPUDynInstPtr gpuDynInst) - { - // temporarily modify exec_mask to supress memory accesses to oob - // regions. Only issue memory requests for lanes that have their - // exec_mask set and are not out of bounds. - VectorMask old_exec_mask = gpuDynInst->exec_mask; - gpuDynInst->exec_mask &= ~oobMask; - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); - gpuDynInst->exec_mask = old_exec_mask; - } - - template - void - initMemWrite(GPUDynInstPtr gpuDynInst) - { - // temporarily modify exec_mask to supress memory accesses to oob - // regions. Only issue memory requests for lanes that have their - // exec_mask set and are not out of bounds. - VectorMask old_exec_mask = gpuDynInst->exec_mask; - gpuDynInst->exec_mask &= ~oobMask; - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); - gpuDynInst->exec_mask = old_exec_mask; - } - - template - void - initMemWrite(GPUDynInstPtr gpuDynInst) - { - // temporarily modify exec_mask to supress memory accesses to oob - // regions. Only issue memory requests for lanes that have their - // exec_mask set and are not out of bounds. - VectorMask old_exec_mask = gpuDynInst->exec_mask; - gpuDynInst->exec_mask &= ~oobMask; - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); - gpuDynInst->exec_mask = old_exec_mask; - } - - void - injectGlobalMemFence(GPUDynInstPtr gpuDynInst) - { - // create request and set flags - gpuDynInst->resetEntireStatusVector(); - gpuDynInst->setStatusVector(0, 1); - RequestPtr req = std::make_shared(0, 0, 0, - gpuDynInst->computeUnit()-> - requestorId(), 0, - gpuDynInst->wfDynId); - gpuDynInst->setRequestFlags(req); - gpuDynInst->computeUnit()-> - injectGlobalMemFence(gpuDynInst, false, req); - } - - /** - * MUBUF insructions calculate their addresses as follows: - * - * index = (IDXEN ? vgpr_idx : 0) + (const_add_tid_en ? TID : 0) - * offset = (OFFEN ? vgpr_off : 0) + inst_off - * - * / ====================== LINEAR ADDRESSING ====================== / - * VADDR = base + sgpr_off + offset + stride * index - * - * / ===================== SWIZZLED ADDRESSING ===================== / - * index_msb = index / const_index_stride - * index_lsb = index % const_index_stride - * offset_msb = offset / const_element_size - * offset_lsb = offset % const_element_size - * buffer_offset = ((index_msb * stride + offset_msb * - * const_element_size) * const_index_stride + - * index_lsb * const_element_size + offset_lsb) - * - * VADDR = base + sgpr_off + buffer_offset - */ - template - void - calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, - SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset) - { - Addr vaddr = 0; - Addr base_addr = 0; - Addr stride = 0; - Addr buf_idx = 0; - Addr buf_off = 0; - Addr buffer_offset = 0; - BufferRsrcDescriptor rsrc_desc; - - std::memcpy((void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(), - sizeof(BufferRsrcDescriptor)); - - base_addr = rsrc_desc.baseAddr; - - stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14) - + rsrc_desc.stride) : rsrc_desc.stride; - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vaddr = base_addr + s_offset.rawData(); - /** - * first we calculate the buffer's index and offset. - * these will be used for either linear or swizzled - * buffers. - */ - buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0); - - buf_off = v_off[lane] + inst_offset; - - if (rsrc_desc.swizzleEn) { - Addr idx_stride = 8 << rsrc_desc.idxStride; - Addr elem_size = 2 << rsrc_desc.elemSize; - Addr idx_msb = buf_idx / idx_stride; - Addr idx_lsb = buf_idx % idx_stride; - Addr off_msb = buf_off / elem_size; - Addr off_lsb = buf_off % elem_size; - DPRINTF(GCN3, "mubuf swizzled lane %d: " - "idx_stride = %llx, elem_size = %llx, " - "idx_msb = %llx, idx_lsb = %llx, " - "off_msb = %llx, off_lsb = %llx\n", - lane, idx_stride, elem_size, idx_msb, idx_lsb, - off_msb, off_lsb); - - buffer_offset =(idx_msb * stride + off_msb * elem_size) - * idx_stride + idx_lsb * elem_size + off_lsb; - } else { - buffer_offset = buf_off + stride * buf_idx; - } - - - /** - * Range check behavior causes out of range accesses to - * to be treated differently. Out of range accesses return - * 0 for loads and are ignored for stores. For - * non-formatted accesses, this is done on a per-lane - * basis. - */ - if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) { - if (buffer_offset >= - rsrc_desc.numRecords - s_offset.rawData()) { - DPRINTF(GCN3, "mubuf out-of-bounds condition 1: " - "lane = %d, buffer_offset = %llx, " - "const_stride = %llx, " - "const_num_records = %llx\n", - lane, buf_off + stride * buf_idx, - rsrc_desc.stride, rsrc_desc.numRecords); - oobMask.set(lane); - continue; - } - } - - if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) { - if (buf_idx >= rsrc_desc.numRecords || - buf_off >= stride) { - DPRINTF(GCN3, "mubuf out-of-bounds condition 2: " - "lane = %d, offset = %llx, " - "index = %llx, " - "const_num_records = %llx\n", - lane, buf_off, buf_idx, - rsrc_desc.numRecords); - oobMask.set(lane); - continue; - } - } - - vaddr += buffer_offset; - - DPRINTF(GCN3, "Calculating mubuf address for lane %d: " - "vaddr = %llx, base_addr = %llx, " - "stride = %llx, buf_idx = %llx, buf_off = %llx\n", - lane, vaddr, base_addr, stride, - buf_idx, buf_off); - gpuDynInst->addr.at(lane) = vaddr; - } - } - } - - // first instruction DWORD - InFmt_MUBUF instData; - // second instruction DWORD - InFmt_MUBUF_1 extData; - // Mask of lanes with out-of-bounds accesses. Needs to be tracked - // seperately from the exec_mask so that we remember to write zero - // to the registers associated with out of bounds lanes. - VectorMask oobMask; - }; // Inst_MUBUF - - class Inst_MTBUF : public GCN3GPUStaticInst - { - public: - Inst_MTBUF(InFmt_MTBUF*, const std::string &opcode); - ~Inst_MTBUF(); - - int instSize() const override; - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_MTBUF instData; - // second instruction DWORD - InFmt_MTBUF_1 extData; - - private: - bool hasSecondDword(InFmt_MTBUF *); - }; // Inst_MTBUF - - class Inst_MIMG : public GCN3GPUStaticInst - { - public: - Inst_MIMG(InFmt_MIMG*, const std::string &opcode); - ~Inst_MIMG(); - - int instSize() const override; - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_MIMG instData; - // second instruction DWORD - InFmt_MIMG_1 extData; - }; // Inst_MIMG - - class Inst_EXP : public GCN3GPUStaticInst - { - public: - Inst_EXP(InFmt_EXP*, const std::string &opcode); - ~Inst_EXP(); - - int instSize() const override; - void initOperandInfo() override; - - protected: - // first instruction DWORD - InFmt_EXP instData; - // second instruction DWORD - InFmt_EXP_1 extData; - }; // Inst_EXP - - class Inst_FLAT : public GCN3GPUStaticInst - { - public: - Inst_FLAT(InFmt_FLAT*, const std::string &opcode); - ~Inst_FLAT(); - - int instSize() const override; - void generateDisassembly() override; - - void initOperandInfo() override; - - protected: - template - void - initMemRead(GPUDynInstPtr gpuDynInst) - { - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - Wavefront *wf = gpuDynInst->wavefront(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane] - = wf->ldsChunk->read(vaddr); - } - } - } - } - - template - void - initMemRead(GPUDynInstPtr gpuDynInst) - { - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - initMemReqHelper(gpuDynInst, MemCmd::ReadReq); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - Wavefront *wf = gpuDynInst->wavefront(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane]; - for (int i = 0; i < N; ++i) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * N + i] - = wf->ldsChunk->read( - vaddr + i*sizeof(VecElemU32)); - } - } - } - } - } - - template - void - initMemWrite(GPUDynInstPtr gpuDynInst) - { - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - Wavefront *wf = gpuDynInst->wavefront(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane]; - wf->ldsChunk->write(vaddr, - (reinterpret_cast(gpuDynInst->d_data))[lane]); - } - } - } - } - - template - void - initMemWrite(GPUDynInstPtr gpuDynInst) - { - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - initMemReqHelper(gpuDynInst, MemCmd::WriteReq); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - Wavefront *wf = gpuDynInst->wavefront(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane]; - for (int i = 0; i < N; ++i) { - wf->ldsChunk->write( - vaddr + i*sizeof(VecElemU32), - (reinterpret_cast( - gpuDynInst->d_data))[lane * N + i]); - } - } - } - } - } - - template - void - initAtomicAccess(GPUDynInstPtr gpuDynInst) - { - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - initMemReqHelper(gpuDynInst, MemCmd::SwapReq, true); - } else if (gpuDynInst->executedAs() == enums::SC_GROUP) { - Wavefront *wf = gpuDynInst->wavefront(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - Addr vaddr = gpuDynInst->addr[lane]; - auto amo_op = - gpuDynInst->makeAtomicOpFunctor( - &(reinterpret_cast( - gpuDynInst->a_data))[lane], - &(reinterpret_cast( - gpuDynInst->x_data))[lane]); - - T tmp = wf->ldsChunk->read(vaddr); - (*amo_op)(reinterpret_cast(&tmp)); - wf->ldsChunk->write(vaddr, tmp); - (reinterpret_cast(gpuDynInst->d_data))[lane] = tmp; - } - } - } - } - - void - calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr) - { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - gpuDynInst->addr.at(lane) = addr[lane]; - } - } - gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask); - } - - // first instruction DWORD - InFmt_FLAT instData; - // second instruction DWORD - InFmt_FLAT_1 extData; - }; // Inst_FLAT -} // namespace Gcn3ISA -} // namespace gem5 - -#endif // __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ diff --git a/src/arch/amdgpu/gcn3/isa.cc b/src/arch/amdgpu/gcn3/isa.cc deleted file mode 100644 index 385a0f0901..0000000000 --- a/src/arch/amdgpu/gcn3/isa.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/amdgpu/gcn3/gpu_isa.hh" - -#include - -#include "gpu-compute/gpu_static_inst.hh" -#include "gpu-compute/wavefront.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - GPUISA::GPUISA(Wavefront &wf) : wavefront(wf), m0(0) - { - } - - ScalarRegU32 - GPUISA::readMiscReg(int opIdx) const - { - switch (opIdx) { - case REG_M0: - return m0; - case REG_ZERO: - return 0; - case REG_SCC: - return statusReg.SCC; - default: - fatal("attempting to read from unsupported or non-readable " - "register. selector val: %i\n", opIdx); - return 0; - } - } - - void - GPUISA::writeMiscReg(int opIdx, ScalarRegU32 operandVal) - { - switch (opIdx) { - case REG_M0: - m0 = operandVal; - break; - case REG_SCC: - statusReg.SCC = operandVal ? 1 : 0; - break; - default: - fatal("attempting to write to an unsupported or non-writable " - "register. selector val: %i\n", opIdx); - break; - } - } - - void - GPUISA::advancePC(GPUDynInstPtr gpuDynInst) - { - wavefront.pc(wavefront.pc() - + gpuDynInst->staticInstruction()->instSize()); - } - - const std::array - GPUISA::posConstRegs = { { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64 - } }; - - const std::array - GPUISA::negConstRegs = { { - -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, - -16 - } }; -} // namespace Gcn3ISA -} // namespace gem5 diff --git a/src/arch/amdgpu/gcn3/operand.hh b/src/arch/amdgpu/gcn3/operand.hh deleted file mode 100644 index 769f28a8a8..0000000000 --- a/src/arch/amdgpu/gcn3/operand.hh +++ /dev/null @@ -1,752 +0,0 @@ -/* - * Copyright (c) 2017-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_GCN3_OPERAND_HH__ -#define __ARCH_GCN3_OPERAND_HH__ - -#include - -#include "arch/amdgpu/gcn3/gpu_registers.hh" -#include "arch/generic/vec_reg.hh" -#include "gpu-compute/scalar_register_file.hh" -#include "gpu-compute/vector_register_file.hh" -#include "gpu-compute/wavefront.hh" - -namespace gem5 -{ - -/** - * classes that represnt vector/scalar operands in GCN3 ISA. these classes - * wrap the generic vector register type (i.e., src/arch/generic/vec_reg.hh) - * and allow them to be manipulated in ways that are unique to GCN3 insts. - */ - -namespace Gcn3ISA -{ - /** - * convenience traits so we can automatically infer the correct FP type - * without looking at the number of dwords (i.e., to determine if we - * need a float or a double when creating FP constants). - */ - template struct OpTraits { typedef float FloatT; }; - template<> struct OpTraits { typedef double FloatT; }; - template<> struct OpTraits { typedef double FloatT; }; - - class Operand - { - public: - Operand() = delete; - - Operand(GPUDynInstPtr gpuDynInst, int opIdx) - : _gpuDynInst(gpuDynInst), _opIdx(opIdx) - { - assert(_gpuDynInst); - assert(_opIdx >= 0); - } - - /** - * read from and write to the underlying register(s) that - * this operand is referring to. - */ - virtual void read() = 0; - virtual void write() = 0; - - protected: - /** - * instruction object that owns this operand - */ - GPUDynInstPtr _gpuDynInst; - /** - * op selector value for this operand. note that this is not - * the same as the register file index, be it scalar or vector. - * this could refer to inline constants, system regs, or even - * special values. - */ - int _opIdx; - }; - - template - class ScalarOperand; - - template - class VecOperand final : public Operand - { - static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords, - "Incorrect number of DWORDS for GCN3 operand."); - - public: - VecOperand() = delete; - - VecOperand(GPUDynInstPtr gpuDynInst, int opIdx) - : Operand(gpuDynInst, opIdx), scalar(false), absMod(false), - negMod(false), scRegData(gpuDynInst, _opIdx), - vrfData{{ nullptr }} - { - vecReg.zero(); - } - - ~VecOperand() - { - } - - /** - * certain vector operands can read from the vrf/srf or constants. - * we use this method to first determine the type of the operand, - * then we read from the appropriate source. if vector we read - * directly from the vrf. if scalar, we read in the data through - * the scalar operand component. this should only be used for VSRC - * operands. - */ - void - readSrc() - { - if (isVectorReg(_opIdx)) { - _opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront() - ->reservedScalarRegs); - read(); - } else { - readScalar(); - } - } - - /** - * read from the vrf. this should only be used by vector inst - * source operands that are explicitly vector (i.e., VSRC). - */ - void - read() override - { - assert(_gpuDynInst); - assert(_gpuDynInst->wavefront()); - assert(_gpuDynInst->computeUnit()); - Wavefront *wf = _gpuDynInst->wavefront(); - ComputeUnit *cu = _gpuDynInst->computeUnit(); - - for (auto i = 0; i < NumDwords; ++i) { - int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx + i); - vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx); - - DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx); - cu->vrf[wf->simdId]->printReg(wf, vgprIdx); - } - - if (NumDwords == 1) { - assert(vrfData[0]); - auto vgpr = vecReg.template as(); - auto reg_file_vgpr = vrfData[0]->template as(); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - std::memcpy((void*)&vgpr[lane], - (void*)®_file_vgpr[lane], sizeof(DataType)); - } - } else if (NumDwords == 2) { - assert(vrfData[0]); - assert(vrfData[1]); - auto vgpr = vecReg.template as(); - auto reg_file_vgpr0 = vrfData[0]->template as(); - auto reg_file_vgpr1 = vrfData[1]->template as(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - VecElemU64 tmp_val(0); - ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane]; - ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane]; - vgpr[lane] = tmp_val; - } - } - } - - /** - * write to the vrf. we maintain a copy of the underlying vector - * reg(s) for this operand (i.e., vrfData/scRegData), as well as a - * temporary vector register representation (i.e., vecReg) of the - * vector register, which allows the execute() methods of instructions - * to easily write their operand data using operator[] regardless of - * their size. after the result is calculated we use write() to write - * the data to the actual register file storage. this allows us to do - * type conversion, etc., in a single call as opposed to doing it - * in each execute() method. - */ - void - write() override - { - assert(_gpuDynInst); - assert(_gpuDynInst->wavefront()); - assert(_gpuDynInst->computeUnit()); - Wavefront *wf = _gpuDynInst->wavefront(); - ComputeUnit *cu = _gpuDynInst->computeUnit(); - VectorMask &exec_mask = _gpuDynInst->isLoad() - ? _gpuDynInst->exec_mask : wf->execMask(); - - if (NumDwords == 1) { - int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx); - vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx); - assert(vrfData[0]); - auto reg_file_vgpr = vrfData[0]->template as(); - auto vgpr = vecReg.template as(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (exec_mask[lane] || _gpuDynInst->ignoreExec()) { - std::memcpy((void*)®_file_vgpr[lane], - (void*)&vgpr[lane], sizeof(DataType)); - } - } - - DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx); - cu->vrf[wf->simdId]->printReg(wf, vgprIdx); - } else if (NumDwords == 2) { - int vgprIdx0 = cu->registerManager->mapVgpr(wf, _opIdx); - int vgprIdx1 = cu->registerManager->mapVgpr(wf, _opIdx + 1); - vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0); - vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1); - assert(vrfData[0]); - assert(vrfData[1]); - auto reg_file_vgpr0 = vrfData[0]->template as(); - auto reg_file_vgpr1 = vrfData[1]->template as(); - auto vgpr = vecReg.template as(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (exec_mask[lane] || _gpuDynInst->ignoreExec()) { - reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0]; - reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1]; - } - } - - DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1); - cu->vrf[wf->simdId]->printReg(wf, vgprIdx0); - cu->vrf[wf->simdId]->printReg(wf, vgprIdx1); - } - } - - void - negModifier() - { - negMod = true; - } - - void - absModifier() - { - absMod = true; - } - - /** - * getter [] operator. only enable if this operand is constant - * (i.e, a source operand) and if it can be represented using - * primitive types (i.e., 8b to 64b primitives). - */ - template - typename std::enable_if_t - operator[](size_t idx) const - { - assert(idx < NumVecElemPerVecReg); - - if (scalar) { - DataType ret_val = scRegData.rawData(); - - if (absMod) { - assert(std::is_floating_point_v); - ret_val = std::fabs(ret_val); - } - - if (negMod) { - assert(std::is_floating_point_v); - ret_val = -ret_val; - } - - return ret_val; - } else { - auto vgpr = vecReg.template as(); - DataType ret_val = vgpr[idx]; - - if (absMod) { - assert(std::is_floating_point_v); - ret_val = std::fabs(ret_val); - } - - if (negMod) { - assert(std::is_floating_point_v); - ret_val = -ret_val; - } - - return ret_val; - } - } - - /** - * setter [] operator. only enable if this operand is non-constant - * (i.e, a destination operand) and if it can be represented using - * primitive types (i.e., 8b to 64b primitives). - */ - template - typename std::enable_if_t - operator[](size_t idx) - { - assert(!scalar); - assert(idx < NumVecElemPerVecReg); - - return vecReg.template as()[idx]; - } - - private: - /** - * if we determine that this operand is a scalar (reg or constant) - * then we read the scalar data into the scalar operand data member. - */ - void - readScalar() - { - scalar = true; - scRegData.read(); - } - - using VecRegCont = - VecRegContainer; - - /** - * whether this operand a scalar or not. - */ - bool scalar; - /** - * absolute value and negative modifiers. VOP3 instructions - * may indicate that their input/output operands must be - * modified, either by taking the absolute value or negating - * them. these bools indicate which modifier, if any, to use. - */ - bool absMod; - bool negMod; - /** - * this holds all the operand data in a single vector register - * object (i.e., if an operand is 64b, this will hold the data - * from both registers the operand is using). - */ - VecRegCont vecReg; - /** - * for src operands that read scalars (i.e., scalar regs or - * a scalar constant). - */ - ScalarOperand scRegData; - /** - * pointers to the underlyding registers (i.e., the actual - * registers in the register file). - */ - std::array vrfData; - }; - - template - class ScalarOperand final : public Operand - { - static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords, - "Incorrect number of DWORDS for GCN3 operand."); - public: - ScalarOperand() = delete; - - ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx) - : Operand(gpuDynInst, opIdx) - { - std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32)); - } - - ~ScalarOperand() - { - } - - /** - * we store scalar data in a std::array, however if we need the - * full operand data we use this method to copy all elements of - * the scalar operand data to a single primitive container. only - * useful for 8b to 64b primitive types, as they are the only types - * that we need to perform computation on. - */ - template - typename std::enable_if_t - rawData() const - { - assert(sizeof(DataType) <= sizeof(srfData)); - DataType raw_data((DataType)0); - std::memcpy((void*)&raw_data, (void*)srfData.data(), - sizeof(DataType)); - - return raw_data; - } - - void* - rawDataPtr() - { - return (void*)srfData.data(); - } - - void - read() override - { - Wavefront *wf = _gpuDynInst->wavefront(); - ComputeUnit *cu = _gpuDynInst->computeUnit(); - - if (!isScalarReg(_opIdx)) { - readSpecialVal(); - } else { - for (auto i = 0; i < NumDwords; ++i) { - int sgprIdx = regIdx(i); - srfData[i] = cu->srf[wf->simdId]->read(sgprIdx); - DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx); - cu->srf[wf->simdId]->printReg(wf, sgprIdx); - } - } - } - - void - write() override - { - Wavefront *wf = _gpuDynInst->wavefront(); - ComputeUnit *cu = _gpuDynInst->computeUnit(); - - if (!isScalarReg(_opIdx)) { - if (_opIdx == REG_EXEC_LO) { - ScalarRegU64 new_exec_mask_val - = wf->execMask().to_ullong(); - if (NumDwords == 1) { - std::memcpy((void*)&new_exec_mask_val, - (void*)srfData.data(), sizeof(VecElemU32)); - } else if (NumDwords == 2) { - std::memcpy((void*)&new_exec_mask_val, - (void*)srfData.data(), sizeof(VecElemU64)); - } else { - panic("Trying to write more than 2 DWORDS to EXEC\n"); - } - VectorMask new_exec_mask(new_exec_mask_val); - wf->execMask() = new_exec_mask; - DPRINTF(GPUSRF, "Write EXEC\n"); - DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val); - } else if (_opIdx == REG_EXEC_HI) { - /** - * If we're writing only the upper half of the EXEC mask - * this ought to be a single dword operand. - */ - assert(NumDwords == 1); - ScalarRegU32 new_exec_mask_hi_val(0); - ScalarRegU64 new_exec_mask_val - = wf->execMask().to_ullong(); - std::memcpy((void*)&new_exec_mask_hi_val, - (void*)srfData.data(), sizeof(new_exec_mask_hi_val)); - replaceBits(new_exec_mask_val, 63, 32, - new_exec_mask_hi_val); - VectorMask new_exec_mask(new_exec_mask_val); - wf->execMask() = new_exec_mask; - DPRINTF(GPUSRF, "Write EXEC\n"); - DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val); - } else { - _gpuDynInst->writeMiscReg(_opIdx, srfData[0]); - } - } else { - for (auto i = 0; i < NumDwords; ++i) { - int sgprIdx = regIdx(i); - auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx); - if (_gpuDynInst->isLoad()) { - assert(sizeof(DataType) <= sizeof(ScalarRegU64)); - sgpr = reinterpret_cast( - _gpuDynInst->scalar_data)[i]; - } else { - sgpr = srfData[i]; - } - DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx); - cu->srf[wf->simdId]->printReg(wf, sgprIdx); - } - } - } - - /** - * bit access to scalar data. primarily used for setting vcc bits. - */ - template - typename std::enable_if_t - setBit(int bit, int bit_val) - { - DataType &sgpr = *((DataType*)srfData.data()); - replaceBits(sgpr, bit, bit_val); - } - - template - typename std::enable_if_t - operator=(DataType rhs) - { - std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType)); - return *this; - } - - private: - /** - * we have determined that we are not reading our scalar operand data - * from the register file, so here we figure out which special value - * we are reading (i.e., float constant, int constant, inline - * constant, or various other system registers (e.g., exec mask). - */ - void - readSpecialVal() - { - assert(NumDwords == 1 || NumDwords == 2); - - switch(_opIdx) { - case REG_EXEC_LO: - { - if (NumDwords == 1) { - ScalarRegU32 exec_mask = _gpuDynInst->wavefront()-> - execMask().to_ulong(); - std::memcpy((void*)srfData.data(), (void*)&exec_mask, - sizeof(exec_mask)); - DPRINTF(GPUSRF, "Read EXEC\n"); - DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask); - } else { - assert(NumDwords == 2); - ScalarRegU64 exec_mask = _gpuDynInst->wavefront()-> - execMask().to_ullong(); - std::memcpy((void*)srfData.data(), (void*)&exec_mask, - sizeof(exec_mask)); - DPRINTF(GPUSRF, "Read EXEC\n"); - DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask); - } - } - break; - case REG_EXEC_HI: - { - /** - * If we're reading only the upper half of the EXEC mask - * this ought to be a single dword operand. - */ - assert(NumDwords == 1); - ScalarRegU64 exec_mask = _gpuDynInst->wavefront() - ->execMask().to_ullong(); - - ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32); - std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi, - sizeof(exec_mask_hi)); - DPRINTF(GPUSRF, "Read EXEC_HI\n"); - DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi); - } - break; - case REG_SRC_SWDA: - case REG_SRC_DPP: - case REG_SRC_LITERAL: - assert(NumDwords == 1); - srfData[0] = _gpuDynInst->srcLiteral(); - break; - case REG_POS_HALF: - { - typename OpTraits::FloatT pos_half = 0.5; - std::memcpy((void*)srfData.data(), (void*)&pos_half, - sizeof(pos_half)); - - } - break; - case REG_NEG_HALF: - { - typename OpTraits::FloatT neg_half = -0.5; - std::memcpy((void*)srfData.data(), (void*)&neg_half, - sizeof(neg_half)); - } - break; - case REG_POS_ONE: - { - typename OpTraits::FloatT pos_one = 1.0; - std::memcpy(srfData.data(), &pos_one, sizeof(pos_one)); - } - break; - case REG_NEG_ONE: - { - typename OpTraits::FloatT neg_one = -1.0; - std::memcpy(srfData.data(), &neg_one, sizeof(neg_one)); - } - break; - case REG_POS_TWO: - { - typename OpTraits::FloatT pos_two = 2.0; - std::memcpy(srfData.data(), &pos_two, sizeof(pos_two)); - } - break; - case REG_NEG_TWO: - { - typename OpTraits::FloatT neg_two = -2.0; - std::memcpy(srfData.data(), &neg_two, sizeof(neg_two)); - } - break; - case REG_POS_FOUR: - { - typename OpTraits::FloatT pos_four = 4.0; - std::memcpy(srfData.data(), &pos_four, sizeof(pos_four)); - } - break; - case REG_NEG_FOUR: - { - typename OpTraits::FloatT neg_four = -4.0; - std::memcpy((void*)srfData.data(), (void*)&neg_four , - sizeof(neg_four)); - } - break; - case REG_PI: - { - assert(sizeof(DataType) == sizeof(ScalarRegF64) - || sizeof(DataType) == sizeof(ScalarRegF32)); - - const ScalarRegU32 pi_u32(0x3e22f983UL); - const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL); - - if (sizeof(DataType) == sizeof(ScalarRegF64)) { - std::memcpy((void*)srfData.data(), - (void*)&pi_u64, sizeof(pi_u64)); - } else { - std::memcpy((void*)srfData.data(), - (void*)&pi_u32, sizeof(pi_u32)); - } - } - break; - default: - { - assert(sizeof(DataType) <= sizeof(srfData)); - DataType misc_val(0); - if (isConstVal(_opIdx)) { - misc_val = (DataType)_gpuDynInst - ->readConstVal(_opIdx); - } else { - misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx); - } - std::memcpy((void*)srfData.data(), (void*)&misc_val, - sizeof(DataType)); - } - } - } - - /** - * for scalars we need to do some extra work to figure out how to - * map the op selector to the sgpr idx because some op selectors - * do not map directly to the srf (i.e., vcc/flat_scratch). - */ - int - regIdx(int dword) const - { - Wavefront *wf = _gpuDynInst->wavefront(); - ComputeUnit *cu = _gpuDynInst->computeUnit(); - int sgprIdx(-1); - - if (_opIdx == REG_VCC_HI) { - sgprIdx = cu->registerManager - ->mapSgpr(wf, wf->reservedScalarRegs - 1 + dword); - } else if (_opIdx == REG_VCC_LO) { - sgprIdx = cu->registerManager - ->mapSgpr(wf, wf->reservedScalarRegs - 2 + dword); - } else if (_opIdx == REG_FLAT_SCRATCH_HI) { - sgprIdx = cu->registerManager - ->mapSgpr(wf, wf->reservedScalarRegs - 3 + dword); - } else if (_opIdx == REG_FLAT_SCRATCH_LO) { - assert(NumDwords == 1); - sgprIdx = cu->registerManager - ->mapSgpr(wf, wf->reservedScalarRegs - 4 + dword); - } else { - sgprIdx = cu->registerManager->mapSgpr(wf, _opIdx + dword); - } - - assert(sgprIdx > -1); - - return sgprIdx; - } - - /** - * in GCN3 each register is represented as a 32b unsigned value, - * however operands may require up to 16 registers, so we store - * all the individual 32b components here. for sub-dword operand - * we still consider them to be 1 dword because the minimum size - * of a register is 1 dword. this class will take care to do the - * proper packing/unpacking of sub-dword operands. - */ - std::array srfData; - }; - - // typedefs for the various sizes/types of scalar operands - using ScalarOperandU8 = ScalarOperand; - using ScalarOperandI8 = ScalarOperand; - using ScalarOperandU16 = ScalarOperand; - using ScalarOperandI16 = ScalarOperand; - using ScalarOperandU32 = ScalarOperand; - using ScalarOperandI32 = ScalarOperand; - using ScalarOperandF32 = ScalarOperand; - using ScalarOperandU64 = ScalarOperand; - using ScalarOperandI64 = ScalarOperand; - using ScalarOperandF64 = ScalarOperand; - using ScalarOperandU128 = ScalarOperand; - using ScalarOperandU256 = ScalarOperand; - using ScalarOperandU512 = ScalarOperand; - // non-writeable versions of scalar operands - using ConstScalarOperandU8 = ScalarOperand; - using ConstScalarOperandI8 = ScalarOperand; - using ConstScalarOperandU16 = ScalarOperand; - using ConstScalarOperandI16 = ScalarOperand; - using ConstScalarOperandU32 = ScalarOperand; - using ConstScalarOperandI32 = ScalarOperand; - using ConstScalarOperandF32 = ScalarOperand; - using ConstScalarOperandU64 = ScalarOperand; - using ConstScalarOperandI64 = ScalarOperand; - using ConstScalarOperandF64 = ScalarOperand; - using ConstScalarOperandU128 = ScalarOperand; - using ConstScalarOperandU256 = ScalarOperand; - using ConstScalarOperandU512 = ScalarOperand; - // typedefs for the various sizes/types of vector operands - using VecOperandU8 = VecOperand; - using VecOperandI8 = VecOperand; - using VecOperandU16 = VecOperand; - using VecOperandI16 = VecOperand; - using VecOperandU32 = VecOperand; - using VecOperandI32 = VecOperand; - using VecOperandF32 = VecOperand; - using VecOperandU64 = VecOperand; - using VecOperandF64 = VecOperand; - using VecOperandI64 = VecOperand; - using VecOperandU96 = VecOperand; - using VecOperandU128 = VecOperand; - using VecOperandU256 = VecOperand; - using VecOperandU512 = VecOperand; - // non-writeable versions of vector operands - using ConstVecOperandU8 = VecOperand; - using ConstVecOperandI8 = VecOperand; - using ConstVecOperandU16 = VecOperand; - using ConstVecOperandI16 = VecOperand; - using ConstVecOperandU32 = VecOperand; - using ConstVecOperandI32 = VecOperand; - using ConstVecOperandF32 = VecOperand; - using ConstVecOperandU64 = VecOperand; - using ConstVecOperandI64 = VecOperand; - using ConstVecOperandF64 = VecOperand; - using ConstVecOperandU96 = VecOperand; - using ConstVecOperandU128 = VecOperand; - using ConstVecOperandU256 = VecOperand; - using ConstVecOperandU512 = VecOperand; -} - -} // namespace gem5 - -#endif // __ARCH_GCN3_OPERAND_HH__ diff --git a/src/arch/amdgpu/gcn3/registers.cc b/src/arch/amdgpu/gcn3/registers.cc deleted file mode 100644 index 7f1d0dba37..0000000000 --- a/src/arch/amdgpu/gcn3/registers.cc +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/amdgpu/gcn3/gpu_registers.hh" - -namespace gem5 -{ - -namespace Gcn3ISA -{ - std::string - opSelectorToRegSym(int idx, int numRegs) - { - std::string reg_sym; - - // we have an SGPR - if (idx <= REG_SGPR_MAX) { - if (numRegs > 1) - reg_sym = "s[" + std::to_string(idx) + ":" + - std::to_string(idx + numRegs - 1) + "]"; - else - reg_sym = "s" + std::to_string(idx); - return reg_sym; - } else if (idx >= REG_VGPR_MIN && idx <= REG_VGPR_MAX) { - if (numRegs > 1) - reg_sym = "v[" + std::to_string(idx - REG_VGPR_MIN) + ":" + - std::to_string(idx - REG_VGPR_MIN + numRegs - 1) + "]"; - else - reg_sym = "v" + std::to_string(idx - REG_VGPR_MIN); - return reg_sym; - } else if (idx >= REG_INT_CONST_POS_MIN && - idx <= REG_INT_CONST_POS_MAX) { - reg_sym = std::to_string(idx - REG_INT_CONST_POS_MIN + 1); - return reg_sym; - } else if (idx >= REG_INT_CONST_NEG_MIN && - idx <= REG_INT_CONST_NEG_MAX) { - int inline_val = -1 - (idx - REG_INT_CONST_NEG_MIN); - reg_sym = std::to_string(inline_val); - return reg_sym; - } - - switch (idx) { - case REG_FLAT_SCRATCH_LO: - reg_sym = "flat_scratch_lo"; - break; - case REG_FLAT_SCRATCH_HI: - reg_sym = "flat_scratch_hi"; - break; - case REG_VCC_LO: - reg_sym = "vcc_lo"; - break; - case REG_VCC_HI: - reg_sym = "vcc_hi"; - break; - case REG_M0: - reg_sym = "m0"; - break; - case REG_EXEC_LO: - reg_sym = "exec"; - break; - case REG_ZERO: - reg_sym = "0"; - break; - case REG_POS_HALF: - reg_sym = "0.5"; - break; - case REG_NEG_HALF: - reg_sym = "-0.5"; - break; - case REG_POS_ONE: - reg_sym = "1"; - break; - case REG_NEG_ONE: - reg_sym = "-1"; - break; - case REG_POS_TWO: - reg_sym = "2"; - break; - case REG_NEG_TWO: - reg_sym = "-2"; - break; - case REG_POS_FOUR: - reg_sym = "4"; - break; - case REG_NEG_FOUR: - reg_sym = "-4"; - break; - default: - fatal("GCN3 ISA instruction has unknown register index %u\n", idx); - break; - } - - return reg_sym; - } - - int - opSelectorToRegIdx(int idx, int numScalarRegs) - { - int regIdx = -1; - - if (idx <= REG_SGPR_MAX) { - regIdx = idx; - } else if (idx >= REG_VGPR_MIN && idx <= REG_VGPR_MAX) { - regIdx = idx - REG_VGPR_MIN; - } else if (idx == REG_VCC_LO) { - /** - * the VCC register occupies the two highest numbered - * SRF entries. VCC is typically indexed by specifying - * VCC_LO (simply called VCC) in the instruction encoding - * and reading it as a 64b value so we only return the - * index to the lower half of the VCC register. - * - * VCC_LO = s[NUM_SGPRS - 2] - * VCC_HI = s[NUM_SGPRS - 1] - * - */ - regIdx = numScalarRegs - 2; - } else if (idx == REG_VCC_HI) { - regIdx = numScalarRegs - 1; - } else if (idx == REG_FLAT_SCRATCH_LO) { - /** - * the FLAT_SCRATCH register occupies the two SRF entries - * just below VCC. FLAT_SCRATCH is typically indexed by - * specifying FLAT_SCRATCH_LO (simply called FLAT_SCRATCH) - * in the instruction encoding and reading it as a 64b value - * so we only return the index to the lower half of the - * FLAT_SCRATCH register. - * - * FLAT_SCRATCH_LO = s[NUM_SGPRS - 4] - * FLAT_SCRATCH_HI = s[NUM_SGPRS - 3] - * - */ - regIdx = numScalarRegs - 4; - } else if (idx == REG_FLAT_SCRATCH_HI) { - regIdx = numScalarRegs - 3; - } - - return regIdx; - } - - bool - isPosConstVal(int opIdx) - { - bool is_pos_const_val = (opIdx >= REG_INT_CONST_POS_MIN - && opIdx <= REG_INT_CONST_POS_MAX); - - return is_pos_const_val; - } - - bool - isNegConstVal(int opIdx) - { - bool is_neg_const_val = (opIdx >= REG_INT_CONST_NEG_MIN - && opIdx <= REG_INT_CONST_NEG_MAX); - - return is_neg_const_val; - } - - bool - isConstVal(int opIdx) - { - bool is_const_val = isPosConstVal(opIdx) || isNegConstVal(opIdx); - return is_const_val; - } - - bool - isLiteral(int opIdx) - { - return opIdx == REG_SRC_LITERAL; - } - - bool - isExecMask(int opIdx) - { - return opIdx == REG_EXEC_LO || opIdx == REG_EXEC_HI; - } - - bool - isVccReg(int opIdx) - { - return opIdx == REG_VCC_LO || opIdx == REG_VCC_HI; - } - - bool - isFlatScratchReg(int opIdx) - { - return opIdx == REG_FLAT_SCRATCH_LO || opIdx == REG_FLAT_SCRATCH_HI; - } - - bool - isScalarReg(int opIdx) - { - // FLAT_SCRATCH and VCC are stored in an SGPR pair - if (opIdx <= REG_SGPR_MAX || opIdx == REG_FLAT_SCRATCH_LO || - opIdx == REG_FLAT_SCRATCH_HI || opIdx == REG_VCC_LO || - opIdx == REG_VCC_HI) { - return true; - } - - return false; - } - - bool - isVectorReg(int opIdx) - { - if (opIdx >= REG_VGPR_MIN && opIdx <= REG_VGPR_MAX) - return true; - - return false; - } - -} // namespace Gcn3ISA -} // namespace gem5 diff --git a/src/arch/amdgpu/vega/SConscript b/src/arch/amdgpu/vega/SConscript index 9c6a01bf81..f40bac2a1e 100644 --- a/src/arch/amdgpu/vega/SConscript +++ b/src/arch/amdgpu/vega/SConscript @@ -49,11 +49,31 @@ Source('tlb_coalescer.cc') DebugFlag('GPUPTWalker', 'Debug flag for GPU page table walker') if env['CONF']['TARGET_GPU_ISA'] == 'vega': - Source('decoder.cc') + Source('gpu_decoder.cc') Source('insts/gpu_static_inst.cc') - Source('insts/instructions.cc') Source('insts/op_encodings.cc') - Source('isa.cc') - Source('registers.cc') + Source('gpu_isa.cc') + Source('gpu_registers.cc') + + Source('insts/sop2.cc') + Source('insts/sopk.cc') + Source('insts/sop1.cc') + Source('insts/sopc.cc') + Source('insts/sopp.cc') + Source('insts/smem.cc') + Source('insts/vop2.cc') + Source('insts/vop1.cc') + Source('insts/vopc.cc') + Source('insts/vinterp.cc') + Source('insts/vop3.cc') + Source('insts/vop3_cmp.cc') + Source('insts/ds.cc') + Source('insts/mubuf.cc') + Source('insts/mtbuf.cc') + Source('insts/mimg.cc') + Source('insts/exp.cc') + Source('insts/flat.cc') + Source('insts/vop3p.cc') + Source('insts/vop3p_mai.cc') DebugFlag('VEGA', 'Debug flag for VEGA GPU ISA') diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc similarity index 96% rename from src/arch/amdgpu/vega/decoder.cc rename to src/arch/amdgpu/vega/gpu_decoder.cc index 065f8c8493..43c33e44cc 100644 --- a/src/arch/amdgpu/vega/decoder.cc +++ b/src/arch/amdgpu/vega/gpu_decoder.cc @@ -29,11 +29,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "arch/amdgpu/vega/gpu_decoder.hh" + #include -#include "arch/amdgpu/vega/gpu_decoder.hh" #include "arch/amdgpu/vega/insts/gpu_static_inst.hh" #include "arch/amdgpu/vega/insts/instructions.hh" +#include "arch/amdgpu/vega/insts/vop3p.hh" namespace gem5 { @@ -498,10 +500,10 @@ namespace VegaISA &Decoder::subDecode_OP_FLAT, &Decoder::subDecode_OP_FLAT, &Decoder::subDecode_OP_FLAT, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::subDecode_OP_FLAT, + &Decoder::subDecode_OP_FLAT, + &Decoder::subDecode_OP_FLAT, + &Decoder::subDecode_OP_FLAT, &Decoder::subDecode_OP_MUBUF, &Decoder::subDecode_OP_MUBUF, &Decoder::subDecode_OP_MUBUF, @@ -884,7 +886,7 @@ namespace VegaISA &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OPU_VOP3__V_FMAC_F32, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -1089,7 +1091,7 @@ namespace VegaISA &Decoder::decode_OPU_VOP3__V_MAD_I16, &Decoder::decode_OPU_VOP3__V_FMA_F16, &Decoder::decode_OPU_VOP3__V_DIV_FIXUP_F16, - &Decoder::decode_invalid, + &Decoder::decode_OPU_VOP3__V_LSHL_ADD_U64, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -1236,14 +1238,14 @@ namespace VegaISA &Decoder::decode_OPU_VOP3__V_CVT_PK_I16_I32, &Decoder::decode_OPU_VOP3__V_PKNORM_I16_F16, &Decoder::decode_OPU_VOP3__V_PKNORM_U16_F16, + &Decoder::decode_invalid, &Decoder::decode_OPU_VOP3__V_ADD_I32, &Decoder::decode_OPU_VOP3__V_SUB_I32, &Decoder::decode_OPU_VOP3__V_ADD_I16, &Decoder::decode_OPU_VOP3__V_SUB_I16, &Decoder::decode_OPU_VOP3__V_PACK_B32_F16, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OPU_VOP3__V_CVT_PK_FP8_F32, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -1678,9 +1680,9 @@ namespace VegaISA &Decoder::decode_OP_FLAT__FLAT_ATOMIC_DEC, &Decoder::decode_invalid, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD_F64, + &Decoder::decode_OP_FLAT__FLAT_ATOMIC_MIN_F64, + &Decoder::decode_OP_FLAT__FLAT_ATOMIC_MAX_F64, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -1807,11 +1809,11 @@ namespace VegaISA &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_XOR, &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_INC, &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_DEC, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F32, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F64, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MIN_F64, + &Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MAX_F64, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -3116,7 +3118,7 @@ namespace VegaISA &Decoder::decode_OP_VOP1__V_CLREXCP, &Decoder::decode_invalid, &Decoder::decode_OP_VOP1__V_SCREEN_PARTITION_4SE_B32, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP1__V_MOV_B64, &Decoder::decode_OP_VOP1__V_CVT_F16_U16, &Decoder::decode_OP_VOP1__V_CVT_F16_I16, &Decoder::decode_OP_VOP1__V_CVT_U16_F16, @@ -3142,7 +3144,7 @@ namespace VegaISA &Decoder::decode_OP_VOP1__V_SAT_PK_U8_I16, &Decoder::decode_invalid, &Decoder::decode_OP_VOP1__V_SWAP_B32, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -3613,12 +3615,23 @@ namespace VegaISA &Decoder::decode_OP_VOP3P__V_MAD_MIX_F32, &Decoder::decode_OP_VOP3P__V_MAD_MIXLO_F16, &Decoder::decode_OP_VOP3P__V_MAD_MIXHI_F16, + &Decoder::decode_OP_VOP3P__V_DOT2_F32_F16, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_DOT2_I32_I16, + &Decoder::decode_OP_VOP3P__V_DOT2_U32_U16, + &Decoder::decode_OP_VOP3P__V_DOT4_I32_I8, + &Decoder::decode_OP_VOP3P__V_DOT4_U32_U8, + &Decoder::decode_OP_VOP3P__V_DOT8_I32_I4, + &Decoder::decode_OP_VOP3P__V_DOT8_U32_U4, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_PK_FMA_F32, + &Decoder::decode_OP_VOP3P__V_PK_MUL_F32, + &Decoder::decode_OP_VOP3P__V_PK_ADD_F32, + &Decoder::decode_OP_VOP3P__V_PK_MOV_B32, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -3631,81 +3644,70 @@ namespace VegaISA &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X1_2B_F32, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X1_4B_F32, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X1_16B_F32, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X2_F32, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_F32, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_F16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_F16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_F16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_F16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_F16, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X4_2B_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X4_4B_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_4X4X4_16B_I8, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X8_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X16_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X16_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X32_I8, + &Decoder::decode_OP_VOP3P__V_ACCVGPR_READ, + &Decoder::decode_OP_VOP3P__V_ACCVGPR_WRITE, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_BF16, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_F16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_F16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_BF16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_BF16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_I32_16X16X64_I8, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_I32_32X32X32_I8, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F64_16X16X4_F64, + &Decoder::decode_OP_VOP3P__V_MFMA_F64_4X4X4_4B_F64, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_FP8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_FP8, }; GPUStaticInst* @@ -4202,8 +4204,7 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_VOP2__V_FMAC_F32(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP2__V_FMAC_F32(&iFmt->iFmt_VOP2); } GPUStaticInst* @@ -4216,8 +4217,7 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_VOP2__V_XNOR_B32(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP2__V_XNOR_B32(&iFmt->iFmt_VOP2); } GPUStaticInst* @@ -6172,6 +6172,12 @@ namespace VegaISA return new Inst_VOP3__V_SUBREV_U32(&iFmt->iFmt_VOP3A); } // decode_OPU_VOP3__V_SUBREV_U32 + GPUStaticInst* + Decoder::decode_OPU_VOP3__V_FMAC_F32(MachInst iFmt) + { + return new Inst_VOP3__V_FMAC_F32(&iFmt->iFmt_VOP3A); + } // decode_OPU_VOP3__V_FMAC_F32 + GPUStaticInst* Decoder::decode_OPU_VOP3__V_NOP(MachInst iFmt) { @@ -7053,6 +7059,12 @@ namespace VegaISA return new Inst_VOP3__V_DIV_FIXUP_F16(&iFmt->iFmt_VOP3A); } + GPUStaticInst* + Decoder::decode_OPU_VOP3__V_LSHL_ADD_U64(MachInst iFmt) + { + return new Inst_VOP3__V_LSHL_ADD_U64(&iFmt->iFmt_VOP3A); + } + GPUStaticInst* Decoder::decode_OPU_VOP3__V_INTERP_P1_F32(MachInst iFmt) { @@ -7289,6 +7301,12 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OPU_VOP3__V_CVT_PK_FP8_F32(MachInst iFmt) + { + return new Inst_VOP3__V_CVT_PK_FP8_F32(&iFmt->iFmt_VOP3A); + } + GPUStaticInst* Decoder::decode_OP_DS__DS_ADD_U32(MachInst iFmt) { @@ -7796,15 +7814,13 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_DS__DS_READ_U16_D16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_DS__DS_READ_U16_D16(&iFmt->iFmt_DS); } GPUStaticInst* Decoder::decode_OP_DS__DS_READ_U16_D16_HI(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_DS__DS_READ_U16_D16_HI(&iFmt->iFmt_DS); } GPUStaticInst* @@ -8292,8 +8308,7 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_FLAT__FLAT_STORE_SHORT_D16_HI(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_SHORT_D16_HI(&iFmt->iFmt_FLAT); } GPUStaticInst* @@ -8440,6 +8455,24 @@ namespace VegaISA return new Inst_FLAT__FLAT_ATOMIC_DEC(&iFmt->iFmt_FLAT); } // decode_OP_FLAT__FLAT_ATOMIC_DEC + GPUStaticInst* + Decoder::decode_OP_FLAT__FLAT_ATOMIC_ADD_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_ADD_F64(&iFmt->iFmt_FLAT); + } // decode_OP_FLAT__FLAT_ATOMIC_ADD_F64 + + GPUStaticInst* + Decoder::decode_OP_FLAT__FLAT_ATOMIC_MIN_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MIN_F64(&iFmt->iFmt_FLAT); + } // decode_OP_FLAT__FLAT_ATOMIC_MIN_F64 + + GPUStaticInst* + Decoder::decode_OP_FLAT__FLAT_ATOMIC_MAX_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MAX_F64(&iFmt->iFmt_FLAT); + } // decode_OP_FLAT__FLAT_ATOMIC_MAX_F64 + GPUStaticInst* Decoder::decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2(MachInst iFmt) { @@ -8588,15 +8621,13 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_GLOBAL__GLOBAL_STORE_SHORT_D16_HI(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_SHORT_D16_HI(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORD(MachInst iFmt) { return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT); - return nullptr; } GPUStaticInst* @@ -8737,6 +8768,40 @@ namespace VegaISA return new Inst_FLAT__FLAT_ATOMIC_DEC(&iFmt->iFmt_FLAT); } + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F32(MachInst iFmt) + { + // Note: There is no flat_atomic_add_f32 as of MI200. However, gem5 + // impelements all global and scratch instructions as Inst_FLAT. + return new Inst_FLAT__FLAT_ATOMIC_ADD_F32(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_PK_ADD_F16(MachInst iFmt) + { + // Note: There is no flat_atomic_pk_add_f16 as of MI200. However, gem5 + // impelements all global and scratch instructions as Inst_FLAT. + return new Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_ADD_F64(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MIN_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MIN_F64(&iFmt->iFmt_FLAT); + } + + GPUStaticInst* + Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_MAX_F64(MachInst iFmt) + { + return new Inst_FLAT__FLAT_ATOMIC_MAX_F64(&iFmt->iFmt_FLAT); + } + GPUStaticInst* Decoder::decode_OP_GLOBAL__GLOBAL_ATOMIC_SWAP_X2(MachInst iFmt) { @@ -9838,64 +9903,55 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_UBYTE(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_UBYTE(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SBYTE(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_SBYTE(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_USHORT(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_USHORT(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SSHORT(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_SSHORT(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_DWORD(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_DWORD(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_DWORDX2(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_DWORDX2(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_DWORDX3(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_DWORDX3(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_DWORDX4(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_DWORDX4(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_BYTE(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_BYTE(&iFmt->iFmt_FLAT); } GPUStaticInst* @@ -9908,43 +9964,37 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_SHORT(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_SHORT(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_SHORT_D16_HI(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_SHORT_D16_HI(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_DWORD(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_DWORDX2(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_DWORDX2(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_DWORDX3(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_DWORDX3(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_DWORDX4(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_DWORDX4(&iFmt->iFmt_FLAT); } GPUStaticInst* @@ -10154,14 +10204,12 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_MUBUF__BUFFER_LOAD_SHORT_D16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_MUBUF__BUFFER_LOAD_SHORT_D16(&iFmt->iFmt_MUBUF); } GPUStaticInst* Decoder::decode_OP_MUBUF__BUFFER_LOAD_SHORT_D16_HI(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(&iFmt->iFmt_MUBUF); } GPUStaticInst* Decoder::decode_OP_MUBUF__BUFFER_LOAD_FORMAT_D16_HI_X(MachInst iFmt) @@ -11581,6 +11629,12 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OP_VOP1__V_MOV_B64(MachInst iFmt) + { + return new Inst_VOP1__V_MOV_B64(&iFmt->iFmt_VOP1); + } // decode_OP_VOP1__V_MOV_B64 + GPUStaticInst* Decoder::decode_OP_VOP1__V_CVT_F16_U16(MachInst iFmt) { @@ -11729,6 +11783,12 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst iFmt) + { + return new Inst_VOP1__V_ACCVGPR_MOV_B32(&iFmt->iFmt_VOP1); + } + GPUStaticInst* Decoder::decode_OP_VOPC__V_CMP_CLASS_F32(MachInst iFmt) { @@ -12920,134 +12980,115 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MAD_I16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MAD_I16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MUL_LO_U16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MUL_LO_U16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_ADD_I16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_ADD_I16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_SUB_I16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_SUB_I16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_LSHLREV_B16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_LSHLREV_B16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_LSHRREV_B16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_LSHRREV_B16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_ASHRREV_I16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_ASHRREV_B16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MAX_I16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MAX_I16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MIN_I16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MIN_I16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MAD_U16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MAD_U16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_ADD_U16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_ADD_U16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_SUB_U16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_SUB_U16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MAX_U16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MAX_U16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MIN_U16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MIN_U16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_FMA_F16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_FMA_F16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_ADD_F16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_ADD_F16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MUL_F16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MUL_F16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MIN_F16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MIN_F16(&iFmt->iFmt_VOP3P); } GPUStaticInst* Decoder::decode_OP_VOP3P__V_PK_MAX_F16(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_VOP3P__V_PK_MAX_F16(&iFmt->iFmt_VOP3P); } GPUStaticInst* @@ -13071,6 +13112,406 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_PK_FMA_F32(MachInst iFmt) + { + return new Inst_VOP3P__V_PK_FMA_F32(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_PK_MUL_F32(MachInst iFmt) + { + return new Inst_VOP3P__V_PK_MUL_F32(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_PK_ADD_F32(MachInst iFmt) + { + return new Inst_VOP3P__V_PK_ADD_F32(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_PK_MOV_B32(MachInst iFmt) + { + return new Inst_VOP3P__V_PK_MOV_B32(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT2_F32_F16(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT2_F32_F16(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT2_I32_I16(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT2_I32_I16(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT2_U32_U16(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT2_U32_U16(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT4_I32_I8(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT4_I32_I8(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT4_U32_U8(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT4_U32_U8(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT8_I32_I4(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT8_I32_I4(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_DOT8_U32_U4(MachInst iFmt) + { + return new Inst_VOP3P__V_DOT8_U32_U4(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X1_2B_F32(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X1_2B_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X1_4B_F32(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X1_4B_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X1_16B_F32(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_4X4X1_16B_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X2_F32(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X2_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_F32(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X4_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X4_2B_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X4_4B_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_4X4X4_16B_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X16_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X4_2B_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X4_4B_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_4X4X4_16B_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X16_I8(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_I32_16X16X16_I8( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X8_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X16_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X32_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_BF16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_F16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_F16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_I32_16X16X64_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_I32_32X32X32_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F64_4X4X4_4B_F64(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F64_4X4X4_4B_F64( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F64_16X16X4_F64(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F64_16X16X4_F64( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_ACCVGPR_READ(MachInst iFmt) + { + return new Inst_VOP3P__V_ACCVGPR_READ(&iFmt->iFmt_VOP3P); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_ACCVGPR_WRITE(MachInst iFmt) + { + return new Inst_VOP3P__V_ACCVGPR_WRITE(&iFmt->iFmt_VOP3P); + } + GPUStaticInst* Decoder::decode_invalid(MachInst iFmt) { diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index af989e0cc7..285377ad3d 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -325,6 +325,7 @@ namespace VegaISA GPUStaticInst* decode_OPU_VOP3__V_ADD_U32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_SUB_U32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_SUBREV_U32(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_FMAC_F32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_NOP(MachInst); GPUStaticInst* decode_OPU_VOP3__V_MOV_B32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_CVT_I32_F64(MachInst); @@ -470,6 +471,7 @@ namespace VegaISA GPUStaticInst* decode_OPU_VOP3__V_MAD_I16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_FMA_F16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_DIV_FIXUP_F16(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_LSHL_ADD_U64(MachInst); GPUStaticInst* decode_OPU_VOP3__V_INTERP_P1_F32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_INTERP_P2_F32(MachInst); GPUStaticInst* decode_OPU_VOP3__V_INTERP_MOV_F32(MachInst); @@ -508,6 +510,7 @@ namespace VegaISA GPUStaticInst* decode_OPU_VOP3__V_ADD_I16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_SUB_I16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_PACK_B32_F16(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_CVT_PK_FP8_F32(MachInst); GPUStaticInst* decode_OP_DS__DS_ADD_U32(MachInst); GPUStaticInst* decode_OP_DS__DS_SUB_U32(MachInst); GPUStaticInst* decode_OP_DS__DS_RSUB_U32(MachInst); @@ -698,6 +701,9 @@ namespace VegaISA GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_XOR(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_INC(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_DEC(MachInst); + GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_ADD_F64(MachInst); + GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_MIN_F64(MachInst); + GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_MAX_F64(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_SWAP_X2(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_CMPSWAP_X2(MachInst); GPUStaticInst* decode_OP_FLAT__FLAT_ATOMIC_ADD_X2(MachInst); @@ -746,6 +752,11 @@ namespace VegaISA GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_XOR(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_INC(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_DEC(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F32(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_PK_ADD_F16(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_F64(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_MIN_F64(MachInst); + GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_MAX_F64(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_SWAP_X2(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_CMPSWAP_X2(MachInst); GPUStaticInst* decode_OP_GLOBAL__GLOBAL_ATOMIC_ADD_X2(MachInst); @@ -1279,6 +1290,7 @@ namespace VegaISA GPUStaticInst* decode_OP_VOP1__V_FREXP_MANT_F32(MachInst); GPUStaticInst* decode_OP_VOP1__V_CLREXCP(MachInst); GPUStaticInst* decode_OP_VOP1__V_SCREEN_PARTITION_4SE_B32(MachInst); + GPUStaticInst* decode_OP_VOP1__V_MOV_B64(MachInst); GPUStaticInst* decode_OP_VOP1__V_CVT_F16_U16(MachInst); GPUStaticInst* decode_OP_VOP1__V_CVT_F16_I16(MachInst); GPUStaticInst* decode_OP_VOP1__V_CVT_U16_F16(MachInst); @@ -1303,6 +1315,7 @@ namespace VegaISA GPUStaticInst* decode_OP_VOP1__V_CVT_NORM_U16_F16(MachInst); GPUStaticInst* decode_OP_VOP1__V_SAT_PK_U8_I16(MachInst); GPUStaticInst* decode_OP_VOP1__V_SWAP_B32(MachInst); + GPUStaticInst* decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst); GPUStaticInst* decode_OP_VOP2__V_CNDMASK_B32(MachInst); GPUStaticInst* decode_OP_VOP2__V_ADD_F32(MachInst); GPUStaticInst* decode_OP_VOP2__V_SUB_F32(MachInst); @@ -1585,6 +1598,65 @@ namespace VegaISA GPUStaticInst* decode_OP_VOP3P__V_MAD_MIX_F32(MachInst); GPUStaticInst* decode_OP_VOP3P__V_MAD_MIXLO_F16(MachInst); GPUStaticInst* decode_OP_VOP3P__V_MAD_MIXHI_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_PK_FMA_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_PK_MUL_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_PK_ADD_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_PK_MOV_B32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT2_F32_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT2_I32_I16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT2_U32_U16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT4_I32_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT4_U32_U8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT8_I32_I4(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_DOT8_U32_U4(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X1_2B_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X1_4B_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_4X4X1_16B_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X2_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X8_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X16_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_32X32X4_2B_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X4_4B_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_4X4X4_16B_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_32X32X8_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X16_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_32X32X16_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X32_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X16_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_I32_16X16X64_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_I32_32X32X32_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F64_16X16X4_F64(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F64_4X4X4_4B_F64(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_ACCVGPR_READ(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_ACCVGPR_WRITE(MachInst); GPUStaticInst* subDecode_OPU_VOP3(MachInst); GPUStaticInst* subDecode_OP_DS(MachInst); GPUStaticInst* subDecode_OP_FLAT(MachInst); @@ -1642,7 +1714,7 @@ namespace VegaISA struct InFmt_FLAT { unsigned int OFFSET : 13; - unsigned int LDS : 1; + unsigned int SVE : 1; unsigned int SEG : 2; unsigned int GLC : 1; unsigned int SLC : 1; @@ -1908,7 +1980,27 @@ namespace VegaISA unsigned int NEG : 3; }; - union InstFormat { + struct InFmt_VOP3P_MAI + { + unsigned int VDST : 8; + unsigned int CBSZ : 3; + unsigned int ABID : 4; + unsigned int ACC_CD : 1; + unsigned int OP : 7; + unsigned int ENCODING : 9; + }; + + struct InFmt_VOP3P_MAI_1 + { + unsigned int SRC0 : 9; + unsigned int SRC1 : 9; + unsigned int SRC2 : 9; + unsigned int ACC : 2; + unsigned int BLGP : 3; + }; + + union InstFormat + { InFmt_DS iFmt_DS; InFmt_DS_1 iFmt_DS_1; InFmt_EXP iFmt_EXP; @@ -1941,6 +2033,8 @@ namespace VegaISA InFmt_VOP_SDWAB iFmt_VOP_SDWAB; InFmt_VOP3P iFmt_VOP3P; InFmt_VOP3P_1 iFmt_VOP3P_1; + InFmt_VOP3P_MAI iFmt_VOP3P_MAI; + InFmt_VOP3P_MAI_1 iFmt_VOP3P_MAI_1; uint32_t imm_u32; float imm_f32; }; // union InstFormat diff --git a/src/arch/amdgpu/vega/isa.cc b/src/arch/amdgpu/vega/gpu_isa.cc similarity index 100% rename from src/arch/amdgpu/vega/isa.cc rename to src/arch/amdgpu/vega/gpu_isa.cc diff --git a/src/arch/amdgpu/vega/registers.cc b/src/arch/amdgpu/vega/gpu_registers.cc similarity index 95% rename from src/arch/amdgpu/vega/registers.cc rename to src/arch/amdgpu/vega/gpu_registers.cc index b7404379cc..302a89e2b8 100644 --- a/src/arch/amdgpu/vega/registers.cc +++ b/src/arch/amdgpu/vega/gpu_registers.cc @@ -89,6 +89,18 @@ namespace VegaISA case REG_ZERO: reg_sym = "0"; break; + case REG_SHARED_BASE: + reg_sym = "src_shared_base"; + break; + case REG_SHARED_LIMIT: + reg_sym = "src_shared_limit"; + break; + case REG_PRIVATE_BASE: + reg_sym = "src_private_base"; + break; + case REG_PRIVATE_LIMIT: + reg_sym = "src_private_limit"; + break; case REG_POS_HALF: reg_sym = "0.5"; break; diff --git a/src/arch/amdgpu/vega/gpu_registers.hh b/src/arch/amdgpu/vega/gpu_registers.hh index 63929d5917..f4d34a571c 100644 --- a/src/arch/amdgpu/vega/gpu_registers.hh +++ b/src/arch/amdgpu/vega/gpu_registers.hh @@ -106,10 +106,10 @@ namespace VegaISA REG_RESERVED_25 = 232, REG_RESERVED_26 = 233, REG_RESERVED_27 = 234, - REG_RESERVED_28 = 235, - REG_RESERVED_29 = 236, - REG_RESERVED_30 = 237, - REG_RESERVED_31 = 238, + REG_SHARED_BASE = 235, + REG_SHARED_LIMIT = 236, + REG_PRIVATE_BASE = 237, + REG_PRIVATE_LIMIT = 238, REG_RESERVED_32 = 239, REG_POS_HALF = 240, REG_NEG_HALF = 241, @@ -129,7 +129,7 @@ namespace VegaISA REG_LDS_DIRECT = 254, REG_SRC_LITERAL = 255, REG_VGPR_MIN = 256, - REG_VGPR_MAX = 511 + REG_VGPR_MAX = 767 }; constexpr size_t MaxOperandDwords(16); diff --git a/src/arch/amdgpu/vega/insts/ds.cc b/src/arch/amdgpu/vega/insts/ds.cc new file mode 100644 index 0000000000..c377daa487 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/ds.cc @@ -0,0 +1,4786 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_DS__DS_ADD_U32 class methods --- + + Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_u32") + { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_ADD_U32 + + Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() + { + } // ~Inst_DS__DS_ADD_U32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] += DATA; + void + Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_ADD_U32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_SUB_U32 class methods --- + + Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_u32") + { + } // Inst_DS__DS_SUB_U32 + + Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32() + { + } // ~Inst_DS__DS_SUB_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_U32 class methods --- + + Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_u32") + { + } // Inst_DS__DS_RSUB_U32 + + Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32() + { + } // ~Inst_DS__DS_RSUB_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_U32 class methods --- + + Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_u32") + { + } // Inst_DS__DS_INC_U32 + + Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32() + { + } // ~Inst_DS__DS_INC_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_U32 class methods --- + + Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_u32") + { + } // Inst_DS__DS_DEC_U32 + + Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32() + { + } // ~Inst_DS__DS_DEC_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_I32 class methods --- + + Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_i32") + { + } // Inst_DS__DS_MIN_I32 + + Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32() + { + } // ~Inst_DS__DS_MIN_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_I32 class methods --- + + Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_i32") + { + } // Inst_DS__DS_MAX_I32 + + Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32() + { + } // ~Inst_DS__DS_MAX_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_U32 class methods --- + + Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_u32") + { + } // Inst_DS__DS_MIN_U32 + + Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32() + { + } // ~Inst_DS__DS_MIN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_U32 class methods --- + + Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_u32") + { + } // Inst_DS__DS_MAX_U32 + + Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32() + { + } // ~Inst_DS__DS_MAX_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_B32 class methods --- + + Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_b32") + { + } // Inst_DS__DS_AND_B32 + + Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32() + { + } // ~Inst_DS__DS_AND_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_B32 class methods --- + + Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_b32") + { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicOr); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_OR_B32 + + Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32() + { + } // ~Inst_DS__DS_OR_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] |= DATA; + void + Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_OR_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_OR_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + + // --- Inst_DS__DS_XOR_B32 class methods --- + + Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_b32") + { + } // Inst_DS__DS_XOR_B32 + + Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32() + { + } // ~Inst_DS__DS_XOR_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_B32 class methods --- + + Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_b32") + { + } // Inst_DS__DS_MSKOR_B32 + + Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32() + { + } // ~Inst_DS__DS_MSKOR_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_B32 class methods --- + + Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B32 + + Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32() + { + } // ~Inst_DS__DS_WRITE_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] = DATA. + // Write dword. + void + Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE2_B32 class methods --- + + Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2_B32 + + Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32() + { + } // ~Inst_DS__DS_WRITE2_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR_BASE + OFFSET0 * 4] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2. + // Write 2 dwords. + void + Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 2] + = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 4; + Addr offset1 = instData.OFFSET1 * 4; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_WRITE2ST64_B32 class methods --- + + Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2st64_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2ST64_B32 + + Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32() + { + } // ~Inst_DS__DS_WRITE2ST64_B32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2; + // Write 2 dwords. + void + Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 2] + = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 4 * 64; + Addr offset1 = instData.OFFSET1 * 4 * 64; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_CMPST_B32 class methods --- + + Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_b32") + { + } // Inst_DS__DS_CMPST_B32 + + Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32() + { + } // ~Inst_DS__DS_CMPST_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP opcode. + void + Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_F32 class methods --- + + Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_f32") + { + setFlag(F32); + } // Inst_DS__DS_CMPST_F32 + + Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32() + { + } // ~Inst_DS__DS_CMPST_F32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP opcode. + void + Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_F32 class methods --- + + Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_f32") + { + setFlag(F32); + } // Inst_DS__DS_MIN_F32 + + Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32() + { + } // ~Inst_DS__DS_MIN_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN. + void + Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_F32 class methods --- + + Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_f32") + { + setFlag(F32); + } // Inst_DS__DS_MAX_F32 + + Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32() + { + } // ~Inst_DS__DS_MAX_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX. + void + Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_NOP class methods --- + + Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_nop") + { + setFlag(Nop); + } // Inst_DS__DS_NOP + + Inst_DS__DS_NOP::~Inst_DS__DS_NOP() + { + } // ~Inst_DS__DS_NOP + + // --- description from .arch file --- + // Do nothing. + void + Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst) + { + gpuDynInst->wavefront()->decLGKMInstsIssued(); + } // execute + // --- Inst_DS__DS_ADD_F32 class methods --- + + Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_f32") + { + setFlag(F32); + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_ADD_F32 + + Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() + { + } // ~Inst_DS__DS_ADD_F32 + + // --- description from .arch file --- + // 32b: + // MEM[ADDR] += DATA; + // Floating point add that handles NaN/INF/denormal values. + void + Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandF32 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B8 class methods --- + + Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b8") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B8 + + Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8() + { + } // ~Inst_DS__DS_WRITE_B8 + + // --- description from .arch file --- + // MEM[ADDR] = DATA[7:0]. + // Byte write. + void + Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU8 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B8_D16_HI class methods --- + + Inst_DS__DS_WRITE_B8_D16_HI::Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b8_d16_hi") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B8_D16_HI + + Inst_DS__DS_WRITE_B8_D16_HI::~Inst_DS__DS_WRITE_B8_D16_HI() + { + } // ~Inst_DS__DS_WRITE_B8_D16_HI + + // --- description from .arch file --- + // MEM[ADDR] = DATA[23:16]. + // Byte write in to high word. + void + Inst_DS__DS_WRITE_B8_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU8 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = bits(data[lane], 23, 16); + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B8_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B8_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B16 class methods --- + + Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b16") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B16 + + Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16() + { + } // ~Inst_DS__DS_WRITE_B16 + + // --- description from .arch file --- + // MEM[ADDR] = DATA[15:0] + // Short write. + void + Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU16 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_ADD_RTN_U32 class methods --- + + Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_rtn_u32") + { + } // Inst_DS__DS_ADD_RTN_U32 + + Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32() + { + } // ~Inst_DS__DS_ADD_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_RTN_U32 class methods --- + + Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_rtn_u32") + { + } // Inst_DS__DS_SUB_RTN_U32 + + Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32() + { + } // ~Inst_DS__DS_SUB_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_RTN_U32 class methods --- + + Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_rtn_u32") + { + } // Inst_DS__DS_RSUB_RTN_U32 + + Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32() + { + } // ~Inst_DS__DS_RSUB_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_RTN_U32 class methods --- + + Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_rtn_u32") + { + } // Inst_DS__DS_INC_RTN_U32 + + Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32() + { + } // ~Inst_DS__DS_INC_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_RTN_U32 class methods --- + + Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_rtn_u32") + { + } // Inst_DS__DS_DEC_RTN_U32 + + Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32() + { + } // ~Inst_DS__DS_DEC_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_I32 class methods --- + + Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_i32") + { + } // Inst_DS__DS_MIN_RTN_I32 + + Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32() + { + } // ~Inst_DS__DS_MIN_RTN_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_I32 class methods --- + + Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_i32") + { + } // Inst_DS__DS_MAX_RTN_I32 + + Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32() + { + } // ~Inst_DS__DS_MAX_RTN_I32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_U32 class methods --- + + Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_u32") + { + } // Inst_DS__DS_MIN_RTN_U32 + + Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32() + { + } // ~Inst_DS__DS_MIN_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_U32 class methods --- + + Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_u32") + { + } // Inst_DS__DS_MAX_RTN_U32 + + Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32() + { + } // ~Inst_DS__DS_MAX_RTN_U32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_RTN_B32 class methods --- + + Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_rtn_b32") + { + } // Inst_DS__DS_AND_RTN_B32 + + Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32() + { + } // ~Inst_DS__DS_AND_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_RTN_B32 class methods --- + + Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_rtn_b32") + { + } // Inst_DS__DS_OR_RTN_B32 + + Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32() + { + } // ~Inst_DS__DS_OR_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_RTN_B32 class methods --- + + Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_rtn_b32") + { + } // Inst_DS__DS_XOR_RTN_B32 + + Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32() + { + } // ~Inst_DS__DS_XOR_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_RTN_B32 class methods --- + + Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_rtn_b32") + { + } // Inst_DS__DS_MSKOR_RTN_B32 + + Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32() + { + } // ~Inst_DS__DS_MSKOR_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG_RTN_B32 class methods --- + + Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg_rtn_b32") + { + } // Inst_DS__DS_WRXCHG_RTN_B32 + + Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32() + { + } // ~Inst_DS__DS_WRXCHG_RTN_B32 + + // --- description from .arch file --- + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + // Write-exchange operation. + void + Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods --- + + Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32") + { + } // Inst_DS__DS_WRXCHG2_RTN_B32 + + Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32() + { + } // ~Inst_DS__DS_WRXCHG2_RTN_B32 + + // --- description from .arch file --- + // Write-exchange 2 separate dwords. + void + Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods --- + + Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32") + { + } // Inst_DS__DS_WRXCHG2ST64_RTN_B32 + + Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32() + { + } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32 + + // --- description from .arch file --- + // Write-exchange 2 separate dwords with a stride of 64 dwords. + void + Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_B32 class methods --- + + Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_b32") + { + } // Inst_DS__DS_CMPST_RTN_B32 + + Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32() + { + } // ~Inst_DS__DS_CMPST_RTN_B32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP opcode. + void + Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_F32 class methods --- + + Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_CMPST_RTN_F32 + + Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32() + { + } // ~Inst_DS__DS_CMPST_RTN_F32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP opcode. + void + Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_F32 class methods --- + + Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_MIN_RTN_F32 + + Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32() + { + } // ~Inst_DS__DS_MIN_RTN_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN. + void + Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_F32 class methods --- + + Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_MAX_RTN_F32 + + Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32() + { + } // ~Inst_DS__DS_MAX_RTN_F32 + + // --- description from .arch file --- + // 32b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX. + void + Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRAP_RTN_B32 class methods --- + + Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrap_rtn_b32") + { + } // Inst_DS__DS_WRAP_RTN_B32 + + Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32() + { + } // ~Inst_DS__DS_WRAP_RTN_B32 + + // --- description from .arch file --- + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2; + // RETURN_DATA = tmp. + void + Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_RTN_F32 class methods --- + + Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_rtn_f32") + { + setFlag(F32); + } // Inst_DS__DS_ADD_RTN_F32 + + Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32() + { + } // ~Inst_DS__DS_ADD_RTN_F32 + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + // Floating point add that handles NaN/INF/denormal values. + void + Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_READ_B32 class methods --- + + Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b32") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B32 + + Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32() + { + } // ~Inst_DS__DS_READ_B32 + + // --- description from .arch file --- + // RETURN_DATA = MEM[ADDR]. + // Dword read. + void + Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ2_B32 class methods --- + + Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2_b32") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2_B32 + + Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32() + { + } // ~Inst_DS__DS_READ2_B32 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4]. + // Read 2 dwords. + void + Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 4; + Addr offset1 = instData.OFFSET1 * 4; + + initDualMemRead(gpuDynInst, offset0, offset1); + } // initiateAcc + + void + Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } // completeAcc + // --- Inst_DS__DS_READ2ST64_B32 class methods --- + + Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2st64_b32") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2ST64_B32 + + Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32() + { + } // ~Inst_DS__DS_READ2ST64_B32 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64]. + // Read 2 dwords. + void + Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = (instData.OFFSET0 * 4 * 64); + Addr offset1 = (instData.OFFSET1 * 4 * 64); + + initDualMemRead(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } + // --- Inst_DS__DS_READ_I8 class methods --- + + Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_i8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_I8 + + Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8() + { + } // ~Inst_DS__DS_READ_I8 + + // --- description from .arch file --- + // RETURN_DATA = signext(MEM[ADDR][7:0]). + // Signed byte read. + void + Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_I8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_I8::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ_U8 class methods --- + + Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_u8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_U8 + + Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8() + { + } // ~Inst_DS__DS_READ_U8 + + // --- description from .arch file --- + // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}. + // Unsigned byte read. + void + Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)(reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ_I16 class methods --- + + Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_i16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_I16 + + Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16() + { + } // ~Inst_DS__DS_READ_I16 + + // --- description from .arch file --- + // RETURN_DATA = signext(MEM[ADDR][15:0]). + // Signed short read. + void + Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_READ_U16 class methods --- + + Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_u16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_U16 + + Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16() + { + } // ~Inst_DS__DS_READ_U16 + + // --- description from .arch file --- + // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}. + // Unsigned short read. + void + Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + void + Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)(reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ_U16_D16 class methods --- + + Inst_DS__DS_READ_U16_D16:: + Inst_DS__DS_READ_U16_D16(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_u16_d16_hi") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_U16_D16 + + Inst_DS__DS_READ_U16_D16::~Inst_DS__DS_READ_U16_D16() + { + } // ~Inst_DS__DS_READ_U16_D16 + + // --- description from .arch file --- + // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16; + // // RETURN_DATA[31:16] is preserved. + void + Inst_DS__DS_READ_U16_D16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + void + Inst_DS__DS_READ_U16_D16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_U16_D16::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + VecElemU16 ds_val = reinterpret_cast( + gpuDynInst->d_data)[lane]; + replaceBits(vdst[lane], 15, 0, ds_val); + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ_U16_D16_HI class methods --- + + Inst_DS__DS_READ_U16_D16_HI:: + Inst_DS__DS_READ_U16_D16_HI(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_u16_d16_hi") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_U16_D16_HI + + Inst_DS__DS_READ_U16_D16_HI::~Inst_DS__DS_READ_U16_D16_HI() + { + } // ~Inst_DS__DS_READ_U16_D16_HI + + // --- description from .arch file --- + // RETURN_DATA[31 : 16].u16 = MEM[ADDR].u16; + // // RETURN_DATA[15:0] is preserved. + void + Inst_DS__DS_READ_U16_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + void + Inst_DS__DS_READ_U16_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_U16_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + VecElemU16 ds_val = reinterpret_cast( + gpuDynInst->d_data)[lane]; + replaceBits(vdst[lane], 31, 16, ds_val); + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_SWIZZLE_B32 class methods --- + + Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_swizzle_b32") + { + /** + * While this operation doesn't actually use DS storage we classify + * it as a load here because it does a writeback to a VGPR, which + * fits in better with the LDS pipeline logic. + */ + setFlag(Load); + setFlag(ALU); + } // Inst_DS__DS_SWIZZLE_B32 + + Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32() + { + } // ~Inst_DS__DS_SWIZZLE_B32 + + // --- description from .arch file --- + // RETURN_DATA = swizzle(vgpr_data, offset1:offset0). + // Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for + // --- details. + void + Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + wf->decLGKMInstsIssued(); + + if (gpuDynInst->exec_mask.none()) { + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit() + ->cyclesToTicks(Cycles(24))); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + VecOperandU32 vdst(gpuDynInst, extData.VDST); + /** + * The "DS pattern" is comprised of both offset fields. That is, the + * swizzle pattern between lanes. Bit 15 of the DS pattern dictates + * which swizzle mode to use. There are two different swizzle + * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use + * QDMode else use Bit-masks mode. The remaining bits dictate how to + * swizzle the lanes. + * + * QDMode: Chunks the lanes into 4s and swizzles among them. + * Bits 7:6 dictate where lane 3 (of the current chunk) + * gets its date, 5:4 lane 2, etc. + * + * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. + * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 + * is the and_mask. Each lane is swizzled by performing + * the appropriate operation using these masks. + */ + VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0); + + data.read(); + + if (bits(ds_pattern, 15)) { + // QDMode + for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) { + /** + * This operation allows data sharing between groups + * of four consecutive threads. Note the increment by + * 4 in the for loop. + */ + if (gpuDynInst->exec_mask[lane]) { + int index0 = lane + bits(ds_pattern, 1, 0); + panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index0); + vdst[lane] + = gpuDynInst->exec_mask[index0] ? data[index0]: 0; + } + if (gpuDynInst->exec_mask[lane + 1]) { + int index1 = lane + bits(ds_pattern, 3, 2); + panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index1); + vdst[lane + 1] + = gpuDynInst->exec_mask[index1] ? data[index1]: 0; + } + if (gpuDynInst->exec_mask[lane + 2]) { + int index2 = lane + bits(ds_pattern, 5, 4); + panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index2); + vdst[lane + 2] + = gpuDynInst->exec_mask[index2] ? data[index2]: 0; + } + if (gpuDynInst->exec_mask[lane + 3]) { + int index3 = lane + bits(ds_pattern, 7, 6); + panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " + "is out of bounds.\n", gpuDynInst->disassemble(), + index3); + vdst[lane + 3] + = gpuDynInst->exec_mask[index3] ? data[index3]: 0; + } + } + } else { + // Bit Mode + int and_mask = bits(ds_pattern, 4, 0); + int or_mask = bits(ds_pattern, 9, 5); + int xor_mask = bits(ds_pattern, 14, 10); + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + int index = (((lane & and_mask) | or_mask) ^ xor_mask); + // Adjust for the next 32 lanes. + if (lane > 31) { + index += 32; + } + panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is " + "out of bounds.\n", gpuDynInst->disassemble(), + index); + vdst[lane] + = gpuDynInst->exec_mask[index] ? data[index] : 0; + } + } + } + + vdst.write(); + + /** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + /** + * Similarly, this counter could build up over time, even across + * multiple wavefronts, and cause a deadlock. + */ + wf->rdLmReqsInPipe--; + } // execute + // --- Inst_DS__DS_PERMUTE_B32 class methods --- + + Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_permute_b32") + { + setFlag(MemoryRef); + /** + * While this operation doesn't actually use DS storage we classify + * it as a load here because it does a writeback to a VGPR, which + * fits in better with the LDS pipeline logic. + */ + setFlag(Load); + } // Inst_DS__DS_PERMUTE_B32 + + Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32() + { + } // ~Inst_DS__DS_PERMUTE_B32 + + // --- description from .arch file --- + // Forward permute. + void + Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + wf->decLGKMInstsIssued(); + + if (gpuDynInst->exec_mask.none()) { + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit() + ->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + addr.read(); + data.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + /** + * One of the offset fields can be used for the index. + * It is assumed OFFSET0 would be used, as OFFSET1 is + * typically only used for DS ops that operate on two + * disparate pieces of data. + */ + assert(!instData.OFFSET1); + /** + * The address provided is a byte address, but VGPRs are + * 4 bytes, so we must divide by 4 to get the actual VGPR + * index. Additionally, the index is calculated modulo the + * WF size, 64 in this case, so we simply extract bits 7-2. + */ + int index = bits(addr[lane] + instData.OFFSET0, 7, 2); + panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " + "of bounds.\n", gpuDynInst->disassemble(), index); + /** + * If the shuffled index corresponds to a lane that is + * inactive then this instruction writes a 0 to the active + * lane in VDST. + */ + if (wf->execMask(index)) { + vdst[index] = data[lane]; + } else { + vdst[index] = 0; + } + } + } + + vdst.write(); + + /** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + /** + * Similarly, this counter could build up over time, even across + * multiple wavefronts, and cause a deadlock. + */ + wf->rdLmReqsInPipe--; + } // execute + // --- Inst_DS__DS_BPERMUTE_B32 class methods --- + + Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_bpermute_b32") + { + setFlag(MemoryRef); + /** + * While this operation doesn't actually use DS storage we classify + * it as a load here because it does a writeback to a VGPR, which + * fits in better with the LDS pipeline logic. + */ + setFlag(Load); + } // Inst_DS__DS_BPERMUTE_B32 + + Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32() + { + } // ~Inst_DS__DS_BPERMUTE_B32 + + // --- description from .arch file --- + // Backward permute. + void + Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + wf->decLGKMInstsIssued(); + + if (gpuDynInst->exec_mask.none()) { + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit() + ->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data(gpuDynInst, extData.DATA0); + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + addr.read(); + data.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + /** + * One of the offset fields can be used for the index. + * It is assumed OFFSET0 would be used, as OFFSET1 is + * typically only used for DS ops that operate on two + * disparate pieces of data. + */ + assert(!instData.OFFSET1); + /** + * The address provided is a byte address, but VGPRs are + * 4 bytes, so we must divide by 4 to get the actual VGPR + * index. Additionally, the index is calculated modulo the + * WF size, 64 in this case, so we simply extract bits 7-2. + */ + int index = bits(addr[lane] + instData.OFFSET0, 7, 2); + panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " + "of bounds.\n", gpuDynInst->disassemble(), index); + /** + * If the shuffled index corresponds to a lane that is + * inactive then this instruction writes a 0 to the active + * lane in VDST. + */ + if (wf->execMask(index)) { + vdst[lane] = data[index]; + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + + /** + * This is needed because we treat this instruction as a load + * but it's not an actual memory request. + * Without this, the destination register never gets marked as + * free, leading to a possible deadlock + */ + wf->computeUnit->vrf[wf->simdId]-> + scheduleWriteOperandsFromLoad(wf, gpuDynInst); + /** + * Similarly, this counter could build up over time, even across + * multiple wavefronts, and cause a deadlock. + */ + wf->rdLmReqsInPipe--; + } // execute + + // --- Inst_DS__DS_ADD_U64 class methods --- + + Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_u64") + { + setFlag(MemoryRef); + setFlag(GroupSegment); + setFlag(AtomicAdd); + setFlag(AtomicNoReturn); + } // Inst_DS__DS_ADD_U64 + + Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() + { + } // ~Inst_DS__DS_ADD_U64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR] += DATA[0:1]; + void + Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initAtomicAccess(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_SUB_U64 class methods --- + + Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_u64") + { + } // Inst_DS__DS_SUB_U64 + + Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64() + { + } // ~Inst_DS__DS_SUB_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_U64 class methods --- + + Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_u64") + { + } // Inst_DS__DS_RSUB_U64 + + Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64() + { + } // ~Inst_DS__DS_RSUB_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_U64 class methods --- + + Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_u64") + { + } // Inst_DS__DS_INC_U64 + + Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64() + { + } // ~Inst_DS__DS_INC_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_U64 class methods --- + + Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_u64") + { + } // Inst_DS__DS_DEC_U64 + + Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64() + { + } // ~Inst_DS__DS_DEC_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_I64 class methods --- + + Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_i64") + { + } // Inst_DS__DS_MIN_I64 + + Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64() + { + } // ~Inst_DS__DS_MIN_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_I64 class methods --- + + Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_i64") + { + } // Inst_DS__DS_MAX_I64 + + Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64() + { + } // ~Inst_DS__DS_MAX_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_U64 class methods --- + + Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_u64") + { + } // Inst_DS__DS_MIN_U64 + + Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64() + { + } // ~Inst_DS__DS_MIN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_U64 class methods --- + + Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_u64") + { + } // Inst_DS__DS_MAX_U64 + + Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64() + { + } // ~Inst_DS__DS_MAX_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_B64 class methods --- + + Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_b64") + { + } // Inst_DS__DS_AND_B64 + + Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64() + { + } // ~Inst_DS__DS_AND_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_B64 class methods --- + + Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_b64") + { + } // Inst_DS__DS_OR_B64 + + Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64() + { + } // ~Inst_DS__DS_OR_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_B64 class methods --- + + Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_b64") + { + } // Inst_DS__DS_XOR_B64 + + Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64() + { + } // ~Inst_DS__DS_XOR_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_B64 class methods --- + + Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_b64") + { + } // Inst_DS__DS_MSKOR_B64 + + Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64() + { + } // ~Inst_DS__DS_MSKOR_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_B64 class methods --- + + Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B64 + + Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64() + { + } // ~Inst_DS__DS_WRITE_B64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR] = DATA. + // Write qword. + void + Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data(gpuDynInst, extData.DATA0); + + addr.read(); + data.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE2_B64 class methods --- + + Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2_B64 + + Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64() + { + } // ~Inst_DS__DS_WRITE2_B64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR_BASE + OFFSET0 * 8] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2. + // Write 2 qwords. + void + Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8; + Addr offset1 = instData.OFFSET1 * 8; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_WRITE2ST64_B64 class methods --- + + Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write2st64_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE2ST64_B64 + + Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64() + { + } // ~Inst_DS__DS_WRITE2ST64_B64 + + // --- description from .arch file --- + // 64b: + // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA; + // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2; + // Write 2 qwords. + void + Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); + + addr.read(); + data0.read(); + data1.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8 * 64; + Addr offset1 = instData.OFFSET1 * 8 * 64; + + initDualMemWrite(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_WRITE2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + } + // --- Inst_DS__DS_CMPST_B64 class methods --- + + Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_b64") + { + } // Inst_DS__DS_CMPST_B64 + + Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64() + { + } // ~Inst_DS__DS_CMPST_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_F64 class methods --- + + Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_f64") + { + setFlag(F64); + } // Inst_DS__DS_CMPST_F64 + + Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64() + { + } // ~Inst_DS__DS_CMPST_F64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_F64 class methods --- + + Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_f64") + { + setFlag(F64); + } // Inst_DS__DS_MIN_F64 + + Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64() + { + } // ~Inst_DS__DS_MIN_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN_X2. + void + Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_F64 class methods --- + + Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_f64") + { + setFlag(F64); + } // Inst_DS__DS_MAX_F64 + + Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64() + { + } // ~Inst_DS__DS_MAX_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX_X2. + void + Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_RTN_U64 class methods --- + + Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_rtn_u64") + { + } // Inst_DS__DS_ADD_RTN_U64 + + Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64() + { + } // ~Inst_DS__DS_ADD_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_RTN_U64 class methods --- + + Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_rtn_u64") + { + } // Inst_DS__DS_SUB_RTN_U64 + + Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64() + { + } // ~Inst_DS__DS_SUB_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_RTN_U64 class methods --- + + Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_rtn_u64") + { + } // Inst_DS__DS_RSUB_RTN_U64 + + Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64() + { + } // ~Inst_DS__DS_RSUB_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA - MEM[ADDR]; + // RETURN_DATA = tmp. + // Subtraction with reversed operands. + void + Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_RTN_U64 class methods --- + + Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_rtn_u64") + { + } // Inst_DS__DS_INC_RTN_U64 + + Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64() + { + } // ~Inst_DS__DS_INC_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_RTN_U64 class methods --- + + Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_rtn_u64") + { + } // Inst_DS__DS_DEC_RTN_U64 + + Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64() + { + } // ~Inst_DS__DS_DEC_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_I64 class methods --- + + Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_i64") + { + } // Inst_DS__DS_MIN_RTN_I64 + + Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64() + { + } // ~Inst_DS__DS_MIN_RTN_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_I64 class methods --- + + Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_i64") + { + } // Inst_DS__DS_MAX_RTN_I64 + + Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64() + { + } // ~Inst_DS__DS_MAX_RTN_I64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_U64 class methods --- + + Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_u64") + { + } // Inst_DS__DS_MIN_RTN_U64 + + Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64() + { + } // ~Inst_DS__DS_MIN_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_U64 class methods --- + + Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_u64") + { + } // Inst_DS__DS_MAX_RTN_U64 + + Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64() + { + } // ~Inst_DS__DS_MAX_RTN_U64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_RTN_B64 class methods --- + + Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_rtn_b64") + { + } // Inst_DS__DS_AND_RTN_B64 + + Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64() + { + } // ~Inst_DS__DS_AND_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_RTN_B64 class methods --- + + Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_rtn_b64") + { + } // Inst_DS__DS_OR_RTN_B64 + + Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64() + { + } // ~Inst_DS__DS_OR_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_RTN_B64 class methods --- + + Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_rtn_b64") + { + } // Inst_DS__DS_XOR_RTN_B64 + + Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64() + { + } // ~Inst_DS__DS_XOR_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MSKOR_RTN_B64 class methods --- + + Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_mskor_rtn_b64") + { + } // Inst_DS__DS_MSKOR_RTN_B64 + + Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64() + { + } // ~Inst_DS__DS_MSKOR_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; + // RETURN_DATA = tmp. + // Masked dword OR, D0 contains the mask and D1 contains the new value. + void + Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG_RTN_B64 class methods --- + + Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg_rtn_b64") + { + } // Inst_DS__DS_WRXCHG_RTN_B64 + + Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64() + { + } // ~Inst_DS__DS_WRXCHG_RTN_B64 + + // --- description from .arch file --- + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + // Write-exchange operation. + void + Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods --- + + Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64") + { + } // Inst_DS__DS_WRXCHG2_RTN_B64 + + Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64() + { + } // ~Inst_DS__DS_WRXCHG2_RTN_B64 + + // --- description from .arch file --- + // Write-exchange 2 separate qwords. + void + Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods --- + + Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64") + { + } // Inst_DS__DS_WRXCHG2ST64_RTN_B64 + + Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64() + { + } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64 + + // --- description from .arch file --- + // Write-exchange 2 qwords with a stride of 64 qwords. + void + Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_B64 class methods --- + + Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_b64") + { + } // Inst_DS__DS_CMPST_RTN_B64 + + Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64() + { + } // ~Inst_DS__DS_CMPST_RTN_B64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Compare and store. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CMPST_RTN_F64 class methods --- + + Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_cmpst_rtn_f64") + { + setFlag(F64); + } // Inst_DS__DS_CMPST_RTN_F64 + + Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64() + { + } // ~Inst_DS__DS_CMPST_RTN_F64 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA2; + // cmp = DATA; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + // Floating point compare and store that handles NaN/INF/denormal values. + // Caution, the order of src and cmp are the *opposite* of the + // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. + void + Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_RTN_F64 class methods --- + + Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_rtn_f64") + { + setFlag(F64); + } // Inst_DS__DS_MIN_RTN_F64 + + Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64() + { + } // ~Inst_DS__DS_MIN_RTN_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (cmp < tmp) ? src : tmp. + // Floating point minimum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMIN_X2. + void + Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_RTN_F64 class methods --- + + Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_rtn_f64") + { + setFlag(F64); + } // Inst_DS__DS_MAX_RTN_F64 + + Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64() + { + } // ~Inst_DS__DS_MAX_RTN_F64 + + // --- description from .arch file --- + // 64b. + // tmp = MEM[ADDR]; + // src = DATA; + // cmp = DATA2; + // MEM[ADDR] = (tmp > cmp) ? src : tmp. + // Floating point maximum that handles NaN/INF/denormal values. + // Note that this opcode is slightly more general-purpose than + // --- BUFFER_ATOMIC_FMAX_X2. + void + Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_READ_B64 class methods --- + + Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b64") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B64 + + Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64() + { + } // ~Inst_DS__DS_READ_B64 + + // --- description from .arch file --- + // RETURN_DATA = MEM[ADDR]. + // Read 1 qword. + void + Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } // completeAcc + // --- Inst_DS__DS_READ2_B64 class methods --- + + Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2_b64") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2_B64 + + Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64() + { + } // ~Inst_DS__DS_READ2_B64 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8]. + // Read 2 qwords. + void + Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0 * 8; + Addr offset1 = instData.OFFSET1 * 8; + + initDualMemRead(gpuDynInst, offset0, offset1); + } // initiateAcc + + void + Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst0(gpuDynInst, extData.VDST); + VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } // completeAcc + // --- Inst_DS__DS_READ2ST64_B64 class methods --- + + Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read2st64_b64") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ2ST64_B64 + + Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64() + { + } // ~Inst_DS__DS_READ2ST64_B64 + + // --- description from .arch file --- + // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64]; + // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64]. + // Read 2 qwords. + void + Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = (instData.OFFSET0 * 8 * 64); + Addr offset1 = (instData.OFFSET1 * 8 * 64); + + initDualMemRead(gpuDynInst, offset0, offset1); + } + + void + Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst0(gpuDynInst, extData.VDST); + VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } + } + + vdst0.write(); + vdst1.write(); + } + // --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods --- + + Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_condxchg32_rtn_b64") + { + } // Inst_DS__DS_CONDXCHG32_RTN_B64 + + Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64() + { + } // ~Inst_DS__DS_CONDXCHG32_RTN_B64 + + // --- description from .arch file --- + // Conditional write exchange. + void + Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_SRC2_U32 class methods --- + + Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_src2_u32") + { + } // Inst_DS__DS_ADD_SRC2_U32 + + Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32() + { + } // ~Inst_DS__DS_ADD_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] + MEM[B]. + void + Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_SRC2_U32 class methods --- + + Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_src2_u32") + { + } // Inst_DS__DS_SUB_SRC2_U32 + + Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32() + { + } // ~Inst_DS__DS_SUB_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] - MEM[B]. + void + Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_SRC2_U32 class methods --- + + Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_src2_u32") + { + } // Inst_DS__DS_RSUB_SRC2_U32 + + Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32() + { + } // ~Inst_DS__DS_RSUB_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B] - MEM[A]. + void + Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_SRC2_U32 class methods --- + + Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_src2_u32") + { + } // Inst_DS__DS_INC_SRC2_U32 + + Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32() + { + } // ~Inst_DS__DS_INC_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). + void + Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_SRC2_U32 class methods --- + + Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_src2_u32") + { + } // Inst_DS__DS_DEC_SRC2_U32 + + Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32() + { + } // ~Inst_DS__DS_DEC_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). + // Uint decrement. + void + Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_I32 class methods --- + + Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_i32") + { + } // Inst_DS__DS_MIN_SRC2_I32 + + Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32() + { + } // ~Inst_DS__DS_MIN_SRC2_I32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_I32 class methods --- + + Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_i32") + { + } // Inst_DS__DS_MAX_SRC2_I32 + + Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32() + { + } // ~Inst_DS__DS_MAX_SRC2_I32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_U32 class methods --- + + Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_u32") + { + } // Inst_DS__DS_MIN_SRC2_U32 + + Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32() + { + } // ~Inst_DS__DS_MIN_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_U32 class methods --- + + Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_u32") + { + } // Inst_DS__DS_MAX_SRC2_U32 + + Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32() + { + } // ~Inst_DS__DS_MAX_SRC2_U32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_SRC2_B32 class methods --- + + Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_src2_b32") + { + } // Inst_DS__DS_AND_SRC2_B32 + + Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32() + { + } // ~Inst_DS__DS_AND_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] & MEM[B]. + void + Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_SRC2_B32 class methods --- + + Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_src2_b32") + { + } // Inst_DS__DS_OR_SRC2_B32 + + Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32() + { + } // ~Inst_DS__DS_OR_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] | MEM[B]. + void + Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_SRC2_B32 class methods --- + + Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_src2_b32") + { + } // Inst_DS__DS_XOR_SRC2_B32 + + Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32() + { + } // ~Inst_DS__DS_XOR_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] ^ MEM[B]. + void + Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_SRC2_B32 class methods --- + + Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_src2_b32") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_SRC2_B32 + + Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32() + { + } // ~Inst_DS__DS_WRITE_SRC2_B32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B]. + // Write dword. + void + Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_F32 class methods --- + + Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_f32") + { + setFlag(F32); + } // Inst_DS__DS_MIN_SRC2_F32 + + Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32() + { + } // ~Inst_DS__DS_MIN_SRC2_F32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_F32 class methods --- + + Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_f32") + { + setFlag(F32); + } // Inst_DS__DS_MAX_SRC2_F32 + + Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32() + { + } // ~Inst_DS__DS_MAX_SRC2_F32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_SRC2_F32 class methods --- + + Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_src2_f32") + { + setFlag(F32); + } // Inst_DS__DS_ADD_SRC2_F32 + + Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32() + { + } // ~Inst_DS__DS_ADD_SRC2_F32 + + // --- description from .arch file --- + // 32b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B] + MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods --- + + Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL( + InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_release_all") + { + } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL + + Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL() + { + } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL + + // --- description from .arch file --- + // GDS Only: The GWS resource (rid) indicated will process this opcode by + // updating the counter and labeling the specified resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // //Incr the state counter of the resource + // state.counter[rid] = state.wave_in_queue; + // state.type = SEMAPHORE; + // return rd_done; //release calling wave + // This action will release ALL queued waves; it Will have no effect if no + // --- waves are present. + void + Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_INIT class methods --- + + Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_init") + { + } // Inst_DS__DS_GWS_INIT + + Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT() + { + } // ~Inst_DS__DS_GWS_INIT + + // --- description from .arch file --- + // GDS Only: Initialize a barrier or semaphore resource. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // //Get the value to use in init + // index = find_first_valid(vector mask) + // value = DATA[thread: index] + // //Set the state of the resource + // state.counter[rid] = lsb(value); //limit #waves + // state.flag[rid] = 0; + // return rd_done; //release calling wave + void + Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_V class methods --- + + Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_v") + { + } // Inst_DS__DS_GWS_SEMA_V + + Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V() + { + } // ~Inst_DS__DS_GWS_SEMA_V + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // updating the counter and labeling the resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // //Incr the state counter of the resource + // state.counter[rid]++; + // state.type = SEMAPHORE; + // return rd_done; //release calling wave + // This action will release one waved if any are queued in this resource. + void + Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_BR class methods --- + + Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_br") + { + } // Inst_DS__DS_GWS_SEMA_BR + + Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR() + { + } // ~Inst_DS__DS_GWS_SEMA_BR + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // updating the counter by the bulk release delivered count and labeling + // the resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // index = find first valid (vector mask) + // count = DATA[thread: index]; + // //Add count to the resource state counter + // state.counter[rid] += count; + // state.type = SEMAPHORE; + // return rd_done; //release calling wave + // This action will release count number of waves, immediately if queued, + // or as they arrive from the noted resource. + void + Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_SEMA_P class methods --- + + Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_sema_p") + { + } // Inst_DS__DS_GWS_SEMA_P + + Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P() + { + } // ~Inst_DS__DS_GWS_SEMA_P + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // queueing it until counter enables a release and then decrementing the + // counter of the resource as a semaphore. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; + // state.type = SEMAPHORE; + // ENQUEUE until(state[rid].counter > 0) + // state[rid].counter--; + // return rd_done + void + Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_GWS_BARRIER class methods --- + + Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_gws_barrier") + { + } // Inst_DS__DS_GWS_BARRIER + + Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER() + { + } // ~Inst_DS__DS_GWS_BARRIER + + // --- description from .arch file --- + // GDS Only: The GWS resource indicated will process this opcode by + // queueing it until barrier is satisfied. The number of waves needed is + // passed in as DATA of first valid thread. + // //Determine the GWS resource to work on + // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0]; + // index = find first valid (vector mask); + // value = DATA[thread: index]; + // // Input Decision Machine + // state.type[rid] = BARRIER; + // if (state[rid].counter <= 0) { + // thread[rid].flag = state[rid].flag; + // ENQUEUE; + // state[rid].flag = !state.flag; + // state[rid].counter = value; + // return rd_done; + // } else { + // state[rid].counter--; + // thread.flag = state[rid].flag; + // ENQUEUE; + // } + // Since the waves deliver the count for the next barrier, this function + // can have a different size barrier for each occurrence. + // // Release Machine + // if (state.type == BARRIER) { + // if (state.flag != thread.flag) { + // return rd_done; + // } + // } + void + Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_CONSUME class methods --- + + Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_consume") + { + } // Inst_DS__DS_CONSUME + + Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME() + { + } // ~Inst_DS__DS_CONSUME + + // --- description from .arch file --- + // LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS + // memory at (M0.base + instr_offset). Return the pre-operation value to + // VGPRs. + void + Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_APPEND class methods --- + + Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_append") + { + } // Inst_DS__DS_APPEND + + Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND() + { + } // ~Inst_DS__DS_APPEND + + // --- description from .arch file --- + // LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory + // at (M0.base + instr_offset). Return the pre-operation value to VGPRs. + void + Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ORDERED_COUNT class methods --- + + Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_ordered_count") + { + } // Inst_DS__DS_ORDERED_COUNT + + Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT() + { + } // ~Inst_DS__DS_ORDERED_COUNT + + // --- description from .arch file --- + // GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated + // ordered-count counters (aka 'packers'). Additional bits of instr.offset + // field are overloaded to hold packer-id, 'last'. + void + Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_ADD_SRC2_U64 class methods --- + + Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_add_src2_u64") + { + } // Inst_DS__DS_ADD_SRC2_U64 + + Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64() + { + } // ~Inst_DS__DS_ADD_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] + MEM[B]. + void + Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_SUB_SRC2_U64 class methods --- + + Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_sub_src2_u64") + { + } // Inst_DS__DS_SUB_SRC2_U64 + + Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64() + { + } // ~Inst_DS__DS_SUB_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] - MEM[B]. + void + Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_RSUB_SRC2_U64 class methods --- + + Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_rsub_src2_u64") + { + } // Inst_DS__DS_RSUB_SRC2_U64 + + Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64() + { + } // ~Inst_DS__DS_RSUB_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B] - MEM[A]. + void + Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_INC_SRC2_U64 class methods --- + + Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_inc_src2_u64") + { + } // Inst_DS__DS_INC_SRC2_U64 + + Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64() + { + } // ~Inst_DS__DS_INC_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). + void + Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_DEC_SRC2_U64 class methods --- + + Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_dec_src2_u64") + { + } // Inst_DS__DS_DEC_SRC2_U64 + + Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64() + { + } // ~Inst_DS__DS_DEC_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). + // Uint decrement. + void + Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_I64 class methods --- + + Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_i64") + { + } // Inst_DS__DS_MIN_SRC2_I64 + + Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64() + { + } // ~Inst_DS__DS_MIN_SRC2_I64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_I64 class methods --- + + Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_i64") + { + } // Inst_DS__DS_MAX_SRC2_I64 + + Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64() + { + } // ~Inst_DS__DS_MAX_SRC2_I64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_U64 class methods --- + + Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_u64") + { + } // Inst_DS__DS_MIN_SRC2_U64 + + Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64() + { + } // ~Inst_DS__DS_MIN_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = min(MEM[A], MEM[B]). + void + Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_U64 class methods --- + + Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_u64") + { + } // Inst_DS__DS_MAX_SRC2_U64 + + Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64() + { + } // ~Inst_DS__DS_MAX_SRC2_U64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = max(MEM[A], MEM[B]). + void + Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_AND_SRC2_B64 class methods --- + + Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_and_src2_b64") + { + } // Inst_DS__DS_AND_SRC2_B64 + + Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64() + { + } // ~Inst_DS__DS_AND_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] & MEM[B]. + void + Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_OR_SRC2_B64 class methods --- + + Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_or_src2_b64") + { + } // Inst_DS__DS_OR_SRC2_B64 + + Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64() + { + } // ~Inst_DS__DS_OR_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] | MEM[B]. + void + Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_XOR_SRC2_B64 class methods --- + + Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_xor_src2_b64") + { + } // Inst_DS__DS_XOR_SRC2_B64 + + Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64() + { + } // ~Inst_DS__DS_XOR_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[A] ^ MEM[B]. + void + Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_SRC2_B64 class methods --- + + Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_src2_b64") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_SRC2_B64 + + Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64() + { + } // ~Inst_DS__DS_WRITE_SRC2_B64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = MEM[B]. + // Write qword. + void + Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MIN_SRC2_F64 class methods --- + + Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_min_src2_f64") + { + setFlag(F64); + } // Inst_DS__DS_MIN_SRC2_F64 + + Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64() + { + } // ~Inst_DS__DS_MIN_SRC2_F64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_MAX_SRC2_F64 class methods --- + + Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_max_src2_f64") + { + setFlag(F64); + } // Inst_DS__DS_MAX_SRC2_F64 + + Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64() + { + } // ~Inst_DS__DS_MAX_SRC2_F64 + + // --- description from .arch file --- + // 64b: + // A = ADDR_BASE; + // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : + // --- {offset1[6],offset1[6:0],offset0}); + // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. + // Float, handles NaN/INF/denorm. + void + Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_DS__DS_WRITE_B96 class methods --- + + Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b96") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B96 + + Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96() + { + } // ~Inst_DS__DS_WRITE_B96 + + // --- description from .arch file --- + // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0]. + // Tri-dword write. + void + Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); + + addr.read(); + data0.read(); + data1.read(); + data2.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite<3>(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_WRITE_B128 class methods --- + + Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_write_b128") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_DS__DS_WRITE_B128 + + Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128() + { + } // ~Inst_DS__DS_WRITE_B128 + + // --- description from .arch file --- + // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0]. + // Qword write. + void + Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); + ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3); + + addr.read(); + data0.read(); + data1.read(); + data2.read(); + data3.read(); + + calcAddr(gpuDynInst, addr); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; + } + } + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemWrite<4>(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_WRITE_B128::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_DS__DS_READ_B96 class methods --- + + Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b96") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B96 + + Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96() + { + } // ~Inst_DS__DS_READ_B96 + + // --- description from .arch file --- + // Tri-dword read. + void + Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B96::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead<3>(gpuDynInst, offset); + } + + void + Inst_DS__DS_READ_B96::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + } + // --- Inst_DS__DS_READ_B128 class methods --- + + Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt) + : Inst_DS(iFmt, "ds_read_b128") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_DS__DS_READ_B128 + + Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128() + { + } // ~Inst_DS__DS_READ_B128 + + // --- description from .arch file --- + // Qword read. + void + Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set( + gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); + ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); + + addr.read(); + + calcAddr(gpuDynInst, addr); + + gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_DS__DS_READ_B128::initiateAcc(GPUDynInstPtr gpuDynInst) + { + Addr offset0 = instData.OFFSET0; + Addr offset1 = instData.OFFSET1; + Addr offset = (offset1 << 8) | offset0; + + initMemRead<4>(gpuDynInst, offset); + } // initiateAcc + + void + Inst_DS__DS_READ_B128::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + vdst3[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + vdst3.write(); + } // completeAcc +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.cc b/src/arch/amdgpu/vega/insts/exp.cc similarity index 72% rename from src/arch/amdgpu/gcn3/insts/gpu_static_inst.cc rename to src/arch/amdgpu/vega/insts/exp.cc index 8e5310843e..31b6ded10f 100644 --- a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.cc +++ b/src/arch/amdgpu/vega/insts/exp.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,31 +29,30 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" - -#include "arch/amdgpu/gcn3/gpu_decoder.hh" -#include "arch/amdgpu/gcn3/insts/instructions.hh" -#include "debug/GPUExec.hh" -#include "gpu-compute/shader.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" namespace gem5 { -namespace Gcn3ISA +namespace VegaISA { - GCN3GPUStaticInst::GCN3GPUStaticInst(const std::string &opcode) - : GPUStaticInst(opcode), _srcLiteral(0) - { - } + // --- Inst_EXP__EXP class methods --- - GCN3GPUStaticInst::~GCN3GPUStaticInst() + Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt) + : Inst_EXP(iFmt, "exp") { - } + } // Inst_EXP__EXP + Inst_EXP__EXP::~Inst_EXP__EXP() + { + } // ~Inst_EXP__EXP + + // --- description from .arch file --- + // Export through SX. void - GCN3GPUStaticInst::panicUnimplemented() const + Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst) { - fatal("Encountered unimplemented GCN3 instruction: %s\n", _opcode); - } -} // namespace Gcn3ISA + panicUnimplemented(); + } // execute +} // namespace VegaISA } // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/flat.cc b/src/arch/amdgpu/vega/insts/flat.cc new file mode 100644 index 0000000000..8dce8d4299 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/flat.cc @@ -0,0 +1,2164 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_FLAT__FLAT_LOAD_UBYTE class methods --- + + Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_ubyte") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_UBYTE + + Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE() + { + } // ~Inst_FLAT__FLAT_LOAD_UBYTE + + // --- description from .arch file --- + // Untyped buffer load unsigned byte (zero extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); + } // execute + // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods --- + + Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_sbyte") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_SBYTE + + Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE() + { + } // ~Inst_FLAT__FLAT_LOAD_SBYTE + + // --- description from .arch file --- + // Untyped buffer load signed byte (sign extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemI32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); + } // execute + // --- Inst_FLAT__FLAT_LOAD_USHORT class methods --- + + Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_ushort") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_USHORT + + Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT() + { + } // ~Inst_FLAT__FLAT_LOAD_USHORT + + // --- description from .arch file --- + // Untyped buffer load unsigned short (zero extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } + } + vdst.write(); + } // execute + + // --- Inst_FLAT__FLAT_LOAD_SSHORT class methods --- + + Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_sshort") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_SSHORT + + Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT() + { + } // ~Inst_FLAT__FLAT_LOAD_SSHORT + + // --- description from .arch file --- + // Untyped buffer load signed short (sign extend to VGPR destination). + void + Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_FLAT__FLAT_LOAD_DWORD class methods --- + + Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORD + + Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORD + + // --- description from .arch file --- + // Untyped buffer load dword. + void + Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + vdst.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods --- + + Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORDX2 + + Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer load 2 dwords. + void + Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU64 vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + vdst.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods --- + + Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dwordx3") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORDX3 + + Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer load 3 dwords. + void + Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<3>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 2]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods --- + + Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_FLAT__FLAT_LOAD_DWORDX4 + + Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4() + { + } // ~Inst_FLAT__FLAT_LOAD_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer load 4 dwords. + void + Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDST); + VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); + VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + vdst3[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3]; + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + vdst3.write(); + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_BYTE class methods --- + + Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_byte") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_BYTE + + Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE() + { + } // ~Inst_FLAT__FLAT_STORE_BYTE + + // --- description from .arch file --- + // Untyped buffer store byte. + void + Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU8 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_FLAT__FLAT_STORE_SHORT class methods --- + + Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_short") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_SHORT + + Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT() + { + } // ~Inst_FLAT__FLAT_STORE_SHORT + + // --- description from .arch file --- + // Untyped buffer store short. + void + Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU16 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_SHORT_D16_HI class methods --- + + Inst_FLAT__FLAT_STORE_SHORT_D16_HI:: + Inst_FLAT__FLAT_STORE_SHORT_D16_HI(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_short_d16_hi") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_SHORT_D16_HI + + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::~Inst_FLAT__FLAT_STORE_SHORT_D16_HI() + { + } // ~Inst_FLAT__FLAT_STORE_SHORT_D16_HI + + // --- description from .arch file --- + // Untyped buffer store short. + void + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = (data[lane] >> 16); + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_SHORT_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORD class methods --- + + Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORD + + Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD() + { + } // ~Inst_FLAT__FLAT_STORE_DWORD + + // --- description from .arch file --- + // Untyped buffer store dword. + void + Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods --- + + Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORDX2 + + Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2() + { + } // ~Inst_FLAT__FLAT_STORE_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer store 2 dwords. + void + Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU64 data(gpuDynInst, extData.DATA); + + data.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods --- + + Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dwordx3") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORDX3 + + Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3() + { + } // ~Inst_FLAT__FLAT_STORE_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer store 3 dwords. + void + Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data0(gpuDynInst, extData.DATA); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); + + data0.read(); + data1.read(); + data2.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 3] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 2] = data2[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<3>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods --- + + Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_FLAT__FLAT_STORE_DWORDX4 + + Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4() + { + } // ~Inst_FLAT__FLAT_STORE_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer store 4 dwords. + void + Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 data0(gpuDynInst, extData.DATA); + ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); + ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); + + data0.read(); + data1.read(); + data2.read(); + data3.read(); + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4] = data0[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; + (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; + } + } + + issueRequestHelper(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<4>(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods --- + + Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_swap") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SWAP + + Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + + // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP + ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_cmpswap") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP() + { + } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA[0]; + // cmp = DATA[1]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD + + Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods --- + + Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_sub") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SUB + + Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SUB + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMIN + + Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMIN + + Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMIN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMIN::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMAX + + Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMAX + + Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMAX::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMAX::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_AND class methods --- + + Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_and") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_AND + + Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND() + { + } // ~Inst_FLAT__FLAT_ATOMIC_AND + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_AND::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_AND::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_OR class methods --- + + Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_or") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_OR + + Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR() + { + } // ~Inst_FLAT__FLAT_ATOMIC_OR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + + // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- + + Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_xor") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_XOR + + Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR() + { + } // ~Inst_FLAT__FLAT_ATOMIC_XOR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_XOR::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_XOR::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_INC class methods --- + + Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_inc") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_INC + + Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC() + { + } // ~Inst_FLAT__FLAT_ATOMIC_INC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods --- + + Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_dec") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_DEC + + Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC() + { + } // ~Inst_FLAT__FLAT_ATOMIC_DEC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_swap_x2") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 + + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 + + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA[0:1]; + // cmp = DATA[2:3]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_x2") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_X2 + + Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_sub_x2") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SUB_X2 + + Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 + + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 + + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMIN_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_smax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 + + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_SMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_umax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 + + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_UMAX_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_and_x2") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_AND_X2 + + Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_AND_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_or_x2") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_OR_X2 + + Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_OR_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_xor_x2") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_XOR_X2 + + Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_XOR_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_inc_x2") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_INC_X2 + + Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods --- + + Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_dec_x2") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_DEC_X2 + + Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2() + { + } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_F32 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_F32::Inst_FLAT__FLAT_ATOMIC_ADD_F32( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_f32") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + Inst_FLAT__FLAT_ATOMIC_ADD_F32::~Inst_FLAT__FLAT_ATOMIC_ADD_F32() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 class methods --- + + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_pk_add_f16") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16() + { + } // ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_ADD_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_ADD_F64::Inst_FLAT__FLAT_ATOMIC_ADD_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_add_f64") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + Inst_FLAT__FLAT_ATOMIC_ADD_F64::~Inst_FLAT__FLAT_ATOMIC_ADD_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_ADD_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_MIN_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_MIN_F64::Inst_FLAT__FLAT_ATOMIC_MIN_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_min_f64") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + Inst_FLAT__FLAT_ATOMIC_MIN_F64::~Inst_FLAT__FLAT_ATOMIC_MIN_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_MIN_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc + // --- Inst_FLAT__FLAT_ATOMIC_MAX_F64 class methods --- + + Inst_FLAT__FLAT_ATOMIC_MAX_F64::Inst_FLAT__FLAT_ATOMIC_MAX_F64( + InFmt_FLAT *iFmt) + : Inst_FLAT(iFmt, "flat_atomic_max_f64") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + } // Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + Inst_FLAT__FLAT_ATOMIC_MAX_F64::~Inst_FLAT__FLAT_ATOMIC_MAX_F64() + { + } // ~Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + atomicExecute(gpuDynInst); + } // execute + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_FLAT__FLAT_ATOMIC_MAX_F64::completeAcc(GPUDynInstPtr gpuDynInst) + { + atomicComplete(gpuDynInst); + } // completeAcc +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/inst_util.hh b/src/arch/amdgpu/vega/insts/inst_util.hh index 7ec2e2ddd3..ac8c572d77 100644 --- a/src/arch/amdgpu/vega/insts/inst_util.hh +++ b/src/arch/amdgpu/vega/insts/inst_util.hh @@ -35,6 +35,7 @@ #include #include "arch/amdgpu/vega/gpu_registers.hh" +#include "arch/amdgpu/vega/insts/gpu_static_inst.hh" namespace gem5 { @@ -315,7 +316,8 @@ namespace VegaISA * 0x142: broadcast 15th thread of each row to next row * 0x143: broadcast thread 31 to rows 2 and 3 */ - int dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum, + inline int + dppInstImpl(SqDPPVals dppCtrl, int currLane, int rowNum, int rowOffset, bool & outOfBounds) { // local variables @@ -699,7 +701,7 @@ namespace VegaISA if (sel < SDWA_WORD_0) { // we are selecting 1 byte // if we sign extended depends on upper-most bit of byte 0 signExt = (signExt && - (bits(currDstVal, VegaISA::MSB_PER_WORD, 0) & 0x80)); + (bits(currDstVal, VegaISA::MSB_PER_BYTE, 0) & 0x80)); for (int byte = 0; byte < 4; ++byte) { low_bit = byte * VegaISA::BITS_PER_BYTE; @@ -712,7 +714,7 @@ namespace VegaISA 3. byte > sel && signExt: we're sign extending and this byte is one of the bytes we need to sign extend */ - origBits_thisByte = bits(origDstVal, high_bit, low_bit); + origBits_thisByte = bits(origDstVal, VegaISA::MSB_PER_BYTE, 0); currBits_thisByte = bits(currDstVal, high_bit, low_bit); newBits = ((byte == sel) ? origBits_thisByte : ((preserve) ? currBits_thisByte : @@ -737,7 +739,7 @@ namespace VegaISA 3. word > (sel & 1) && signExt: we're sign extending and this word is one of the words we need to sign extend */ - origBits_thisWord = bits(origDstVal, high_bit, low_bit); + origBits_thisWord = bits(origDstVal, VegaISA::MSB_PER_WORD, 0); currBits_thisWord = bits(currDstVal, high_bit, low_bit); newBits = ((word == (sel & 0x1)) ? origBits_thisWord : ((preserve) ? currBits_thisWord : diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc deleted file mode 100644 index cd4ad74e6e..0000000000 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ /dev/null @@ -1,45912 +0,0 @@ -/* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its - * contributors may be used to endorse or promote products derived from this - * software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "arch/amdgpu/vega/insts/instructions.hh" - -#include - -#include "arch/amdgpu/vega/insts/inst_util.hh" -#include "debug/VEGA.hh" -#include "debug/GPUSync.hh" -#include "dev/amdgpu/hwreg_defines.hh" -#include "gpu-compute/shader.hh" - -namespace gem5 -{ - -namespace VegaISA -{ - // --- Inst_SOP2__S_ADD_U32 class methods --- - - Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_add_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADD_U32 - - Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() - { - } // ~Inst_SOP2__S_ADD_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - // SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned - // --- overflow/carry-out for S_ADDC_U32. - void - Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() + src1.rawData(); - scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) - >= 0x100000000ULL ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_SUB_U32 class methods --- - - Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_sub_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUB_U32 - - Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() - { - } // ~Inst_SOP2__S_SUB_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for - // --- S_SUBB_U32. - void - Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() - src1.rawData(); - scc = (src1.rawData() > src0.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ADD_I32 class methods --- - - Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_add_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADD_I32 - - Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() - { - } // ~Inst_SOP2__S_ADD_I32 - - // --- description from .arch file --- - // D.i = S0.i + S1.i; - // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed - // overflow. - // This opcode is not suitable for use with S_ADDC_U32 for implementing - // 64-bit operations. - void - Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() + src1.rawData(); - scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) - && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) - ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_SUB_I32 class methods --- - - Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_sub_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUB_I32 - - Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() - { - } // ~Inst_SOP2__S_SUB_I32 - - // --- description from .arch file --- - // D.i = S0.i - S1.i; - // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed - // overflow. - // CAUTION: The condition code behaviour for this opcode is inconsistent - // with V_SUB_I32; see V_SUB_I32 for further details. - // This opcode is not suitable for use with S_SUBB_U32 for implementing - // 64-bit operations. - void - Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() - src1.rawData(); - scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) - && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ADDC_U32 class methods --- - - Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_addc_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_ADDC_U32 - - Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() - { - } // ~Inst_SOP2__S_ADDC_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + SCC; - // SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned - // overflow. - void - Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = src0.rawData() + src1.rawData() + scc.rawData(); - scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() - + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_SUBB_U32 class methods --- - - Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_subb_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_SUBB_U32 - - Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() - { - } // ~Inst_SOP2__S_SUBB_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u - SCC; - // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. - void - Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = src0.rawData() - src1.rawData() - scc.rawData(); - scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MIN_I32 class methods --- - - Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_min_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MIN_I32 - - Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() - { - } // ~Inst_SOP2__S_MIN_I32 - - // --- description from .arch file --- - // D.i = (S0.i < S1.i) ? S0.i : S1.i; - // SCC = 1 if S0 is chosen as the minimum value. - void - Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::min(src0.rawData(), src1.rawData()); - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MIN_U32 class methods --- - - Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_min_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MIN_U32 - - Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() - { - } // ~Inst_SOP2__S_MIN_U32 - - // --- description from .arch file --- - // D.u = (S0.u < S1.u) ? S0.u : S1.u; - // SCC = 1 if S0 is chosen as the minimum value. - void - Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::min(src0.rawData(), src1.rawData()); - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MAX_I32 class methods --- - - Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_max_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MAX_I32 - - Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() - { - } // ~Inst_SOP2__S_MAX_I32 - - // --- description from .arch file --- - // D.i = (S0.i > S1.i) ? S0.i : S1.i; - // SCC = 1 if S0 is chosen as the maximum value. - void - Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::max(src0.rawData(), src1.rawData()); - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_MAX_U32 class methods --- - - Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_max_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MAX_U32 - - Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() - { - } // ~Inst_SOP2__S_MAX_U32 - - // --- description from .arch file --- - // D.u = (S0.u > S1.u) ? S0.u : S1.u; - // SCC = 1 if S0 is chosen as the maximum value. - void - Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = std::max(src0.rawData(), src1.rawData()); - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_CSELECT_B32 class methods --- - - Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cselect_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_CSELECT_B32 - - Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() - { - } // ~Inst_SOP2__S_CSELECT_B32 - - // --- description from .arch file --- - // D.u = SCC ? S0.u : S1.u (conditional select). - void - Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = scc.rawData() ? src0.rawData() : src1.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_CSELECT_B64 class methods --- - - Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cselect_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_CSELECT_B64 - - Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() - { - } // ~Inst_SOP2__S_CSELECT_B64 - - // --- description from .arch file --- - // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). - void - Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - scc.read(); - - sdst = scc.rawData() ? src0.rawData() : src1.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_AND_B32 class methods --- - - Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_and_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_AND_B32 - - Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() - { - } // ~Inst_SOP2__S_AND_B32 - - // --- description from .arch file --- - // D.u = S0.u & S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() & src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_AND_B64 class methods --- - - Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_and_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_AND_B64 - - Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() - { - } // ~Inst_SOP2__S_AND_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 & S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() & src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_OR_B32 class methods --- - - Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_or_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_OR_B32 - - Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() - { - } // ~Inst_SOP2__S_OR_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() | src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_OR_B64 class methods --- - - Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_or_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_OR_B64 - - Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() - { - } // ~Inst_SOP2__S_OR_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 | S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() | src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XOR_B32 class methods --- - - Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_XOR_B32 - - Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() - { - } // ~Inst_SOP2__S_XOR_B32 - - // --- description from .arch file --- - // D.u = S0.u ^ S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() ^ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XOR_B64 class methods --- - - Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_XOR_B64 - - Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() - { - } // ~Inst_SOP2__S_XOR_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 ^ S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() ^ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ANDN2_B32 class methods --- - - Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_andn2_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_ANDN2_B32 - - Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() - { - } // ~Inst_SOP2__S_ANDN2_B32 - - // --- description from .arch file --- - // D.u = S0.u & ~S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() &~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ANDN2_B64 class methods --- - - Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_andn2_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_ANDN2_B64 - - Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() - { - } // ~Inst_SOP2__S_ANDN2_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 & ~S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() &~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ORN2_B32 class methods --- - - Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_orn2_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_ORN2_B32 - - Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() - { - } // ~Inst_SOP2__S_ORN2_B32 - - // --- description from .arch file --- - // D.u = S0.u | ~S1.u; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() |~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ORN2_B64 class methods --- - - Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_orn2_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_ORN2_B64 - - Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() - { - } // ~Inst_SOP2__S_ORN2_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 | ~S1.u64; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = src0.rawData() |~ src1.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NAND_B32 class methods --- - - Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nand_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_NAND_B32 - - Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() - { - } // ~Inst_SOP2__S_NAND_B32 - - // --- description from .arch file --- - // D.u = ~(S0.u & S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() & src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NAND_B64 class methods --- - - Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nand_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_NAND_B64 - - Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() - { - } // ~Inst_SOP2__S_NAND_B64 - - // --- description from .arch file --- - // D.u64 = ~(S0.u64 & S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() & src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NOR_B32 class methods --- - - Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_NOR_B32 - - Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() - { - } // ~Inst_SOP2__S_NOR_B32 - - // --- description from .arch file --- - // D.u = ~(S0.u | S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() | src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_NOR_B64 class methods --- - - Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_nor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_NOR_B64 - - Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() - { - } // ~Inst_SOP2__S_NOR_B64 - - // --- description from .arch file --- - // D.u64 = ~(S0.u64 | S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() | src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XNOR_B32 class methods --- - - Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xnor_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_XNOR_B32 - - Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() - { - } // ~Inst_SOP2__S_XNOR_B32 - - // --- description from .arch file --- - // D.u = ~(S0.u ^ S1.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() ^ src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_XNOR_B64 class methods --- - - Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_xnor_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_XNOR_B64 - - Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() - { - } // ~Inst_SOP2__S_XNOR_B64 - - // --- description from .arch file --- - // D.u64 = ~(S0.u64 ^ S1.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = ~(src0.rawData() ^ src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHL_B32 class methods --- - - Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshl_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHL_B32 - - Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() - { - } // ~Inst_SOP2__S_LSHL_B32 - - // --- description from .arch file --- - // D.u = S0.u << S1.u[4:0]; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHL_B64 class methods --- - - Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshl_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHL_B64 - - Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() - { - } // ~Inst_SOP2__S_LSHL_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 << S1.u[5:0]; - // SCC = 1 if result is non-zero. - void - Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHR_B32 class methods --- - - Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshr_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHR_B32 - - Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() - { - } // ~Inst_SOP2__S_LSHR_B32 - - // --- description from .arch file --- - // D.u = S0.u >> S1.u[4:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to zero. - void - Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_LSHR_B64 class methods --- - - Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_lshr_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_LSHR_B64 - - Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() - { - } // ~Inst_SOP2__S_LSHR_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64 >> S1.u[5:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to zero. - void - Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ASHR_I32 class methods --- - - Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_ashr_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ASHR_I32 - - Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() - { - } // ~Inst_SOP2__S_ASHR_I32 - - // --- description from .arch file --- - // D.i = signext(S0.i) >> S1.u[4:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to the sign bit of the input value. - void - Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_ASHR_I64 class methods --- - - Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_ashr_i64") - { - setFlag(ALU); - } // Inst_SOP2__S_ASHR_I64 - - Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() - { - } // ~Inst_SOP2__S_ASHR_I64 - - // --- description from .arch file --- - // D.i64 = signext(S0.i64) >> S1.u[5:0]; - // SCC = 1 if result is non-zero. - // The vacated bits are set to the sign bit of the input value. - void - Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFM_B32 class methods --- - - Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfm_b32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFM_B32 - - Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() - { - } // ~Inst_SOP2__S_BFM_B32 - - // --- description from .arch file --- - // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). - void - Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) - << bits(src1.rawData(), 4, 0); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_BFM_B64 class methods --- - - Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfm_b64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFM_B64 - - Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() - { - } // ~Inst_SOP2__S_BFM_B64 - - // --- description from .arch file --- - // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). - void - Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) - << bits(src1.rawData(), 5, 0); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_MUL_I32 class methods --- - - Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_I32 - - Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() - { - } // ~Inst_SOP2__S_MUL_I32 - - // --- description from .arch file --- - // D.i = S0.i * S1.i. - void - Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - sdst = src0.rawData() * src1.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_BFE_U32 class methods --- - - Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_U32 - - Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() - { - } // ~Inst_SOP2__S_BFE_U32 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is - // field width. - // D.u = (S0.u>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFE_I32 class methods --- - - Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_I32 - - Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() - { - } // ~Inst_SOP2__S_BFE_I32 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is - // field width. - // D.i = (S0.i>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - - // Above extracted a signed int of size src1[22:16] bits which needs - // to be signed-extended. Check if the MSB of our src1[22:16]-bit - // integer is 1, and sign extend it is. - // - // Note: The description in the Vega ISA manual does not mention to - // sign-extend the result. An update description can be found in the - // more recent RDNA3 manual here: - // https://developer.amd.com/wp-content/resources/ - // RDNA3_Shader_ISA_December2022.pdf - if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { - sdst = sdst.rawData() - | (0xffffffff << bits(src1.rawData(), 22, 16)); - } - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFE_U64 class methods --- - - Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_u64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_U64 - - Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() - { - } // ~Inst_SOP2__S_BFE_U64 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is - // field width. - // D.u64 = (S0.u64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_BFE_I64 class methods --- - - Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_bfe_i64") - { - setFlag(ALU); - } // Inst_SOP2__S_BFE_I64 - - Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() - { - } // ~Inst_SOP2__S_BFE_I64 - - // --- description from .arch file --- - // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is - // field width. - // D.i64 = (S0.i64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) - & ((1 << bits(src1.rawData(), 22, 16)) - 1); - - // Above extracted a signed int of size src1[22:16] bits which needs - // to be signed-extended. Check if the MSB of our src1[22:16]-bit - // integer is 1, and sign extend it is. - if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { - sdst = sdst.rawData() - | 0xffffffffffffffff << bits(src1.rawData(), 22, 16); - } - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_CBRANCH_G_FORK class methods --- - - Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_cbranch_g_fork") - { - setFlag(Branch); - } // Inst_SOP2__S_CBRANCH_G_FORK - - Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() - { - } // ~Inst_SOP2__S_CBRANCH_G_FORK - - // --- description from .arch file --- - // mask_pass = S0.u64 & EXEC; - // mask_fail = ~S0.u64 & EXEC; - // if(mask_pass == EXEC) - // PC = S1.u64; - // elsif(mask_fail == EXEC) - // PC += 4; - // elsif(bitcount(mask_fail) < bitcount(mask_pass)) - // EXEC = mask_fail; - // SGPR[CSP*4] = { S1.u64, mask_pass }; - // CSP++; - // PC += 4; - // else - // EXEC = mask_pass; - // SGPR[CSP*4] = { PC + 4, mask_fail }; - // CSP++; - // PC = S1.u64; - // end. - // Conditional branch using branch-stack. - // S0 = compare mask(vcc or any sgpr) and - // S1 = 64-bit byte address of target instruction. - // See also S_CBRANCH_JOIN. - void - Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP2__S_ABSDIFF_I32 class methods --- - - Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_absdiff_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_ABSDIFF_I32 - - Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() - { - } // ~Inst_SOP2__S_ABSDIFF_I32 - - // --- description from .arch file --- - // D.i = S0.i - S1.i; - // if(D.i < 0) then D.i = -D.i; - // SCC = 1 if result is non-zero. - // Compute the absolute value of difference between two values. - void - Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - sdst = std::abs(src0.rawData() - src1.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP2__S_RFE_RESTORE_B64 class methods --- - - Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( - InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_rfe_restore_b64") - { - } // Inst_SOP2__S_RFE_RESTORE_B64 - - Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() - { - } // ~Inst_SOP2__S_RFE_RESTORE_B64 - - // --- description from .arch file --- - // PRIV = 0; - // PC = S0.u64; - // INST_ATC = S1.u32[0]. - // Return from exception handler and continue, possibly changing the - // --- instruction ATC mode. - // This instruction may only be used within a trap handler. - // Use this instruction when the main program may be in a different memory - // --- space than the trap handler. - void - Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP2__S_MUL_HI_U32 class methods --- - - Inst_SOP2__S_MUL_HI_U32::Inst_SOP2__S_MUL_HI_U32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_hi_u32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_HI_U32 - - Inst_SOP2__S_MUL_HI_U32::~Inst_SOP2__S_MUL_HI_U32() - { - } // ~Inst_SOP2__S_MUL_HI_U32 - - // --- description from .arch file --- - // D.u = (S0.u * S1.u) >> 32; - void - Inst_SOP2__S_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - VecElemU64 tmp_dst = - ((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData()); - sdst = (tmp_dst >> 32); - - sdst.write(); - } // execute - // --- Inst_SOP2__S_MUL_HI_I32 class methods --- - - Inst_SOP2__S_MUL_HI_I32::Inst_SOP2__S_MUL_HI_I32(InFmt_SOP2 *iFmt) - : Inst_SOP2(iFmt, "s_mul_hi_i32") - { - setFlag(ALU); - } // Inst_SOP2__S_MUL_HI_I32 - - Inst_SOP2__S_MUL_HI_I32::~Inst_SOP2__S_MUL_HI_I32() - { - } // ~Inst_SOP2__S_MUL_HI_I32 - - // --- description from .arch file --- - // D.u = (S0.u * S1.u) >> 32; - void - Inst_SOP2__S_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src0.read(); - src1.read(); - - VecElemI64 tmp_src0 = - sext::digits>(src0.rawData()); - VecElemI64 tmp_src1 = - sext::digits>(src1.rawData()); - sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - - sdst.write(); - } // execute - // --- Inst_SOPK__S_MOVK_I32 class methods --- - - Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_movk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_MOVK_I32 - - Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() - { - } // ~Inst_SOPK__S_MOVK_I32 - - // --- description from .arch file --- - // D.i = signext(SIMM16) (sign extension). - void - Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - sdst = simm16; - - sdst.write(); - } // execute - // --- Inst_SOPK__S_CMOVK_I32 class methods --- - - Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmovk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMOVK_I32 - - Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() - { - } // ~Inst_SOPK__S_CMOVK_I32 - - // --- description from .arch file --- - // if(SCC) then D.i = signext(SIMM16); - // else NOP. - // Conditional move with sign extension. - void - Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (scc.rawData()) { - sdst = simm16; - sdst.write(); - } - } // execute - // --- Inst_SOPK__S_CMPK_EQ_I32 class methods --- - - Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_eq_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_EQ_I32 - - Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() - { - } // ~Inst_SOPK__S_CMPK_EQ_I32 - - // --- description from .arch file --- - // SCC = (S0.i == signext(SIMM16)). - void - Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() == simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LG_I32 class methods --- - - Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lg_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LG_I32 - - Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() - { - } // ~Inst_SOPK__S_CMPK_LG_I32 - - // --- description from .arch file --- - // SCC = (S0.i != signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() != simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GT_I32 class methods --- - - Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_gt_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GT_I32 - - Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() - { - } // ~Inst_SOPK__S_CMPK_GT_I32 - - // --- description from .arch file --- - // SCC = (S0.i > signext(SIMM16)). - void - Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() > simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GE_I32 class methods --- - - Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_ge_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GE_I32 - - Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() - { - } // ~Inst_SOPK__S_CMPK_GE_I32 - - // --- description from .arch file --- - // SCC = (S0.i >= signext(SIMM16)). - void - Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() >= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LT_I32 class methods --- - - Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lt_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LT_I32 - - Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() - { - } // ~Inst_SOPK__S_CMPK_LT_I32 - - // --- description from .arch file --- - // SCC = (S0.i < signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() < simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LE_I32 class methods --- - - Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_le_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LE_I32 - - Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() - { - } // ~Inst_SOPK__S_CMPK_LE_I32 - - // --- description from .arch file --- - // SCC = (S0.i <= signext(SIMM16)). - void - Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() <= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_EQ_U32 class methods --- - - Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_eq_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_EQ_U32 - - Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() - { - } // ~Inst_SOPK__S_CMPK_EQ_U32 - - // --- description from .arch file --- - // SCC = (S0.u == SIMM16). - void - Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() == simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LG_U32 class methods --- - - Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lg_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LG_U32 - - Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() - { - } // ~Inst_SOPK__S_CMPK_LG_U32 - - // --- description from .arch file --- - // SCC = (S0.u != SIMM16). - void - Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() != simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GT_U32 class methods --- - - Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_gt_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GT_U32 - - Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() - { - } // ~Inst_SOPK__S_CMPK_GT_U32 - - // --- description from .arch file --- - // SCC = (S0.u > SIMM16). - void - Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() > simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_GE_U32 class methods --- - - Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_ge_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_GE_U32 - - Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() - { - } // ~Inst_SOPK__S_CMPK_GE_U32 - - // --- description from .arch file --- - // SCC = (S0.u >= SIMM16). - void - Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() >= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LT_U32 class methods --- - - Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_lt_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LT_U32 - - Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() - { - } // ~Inst_SOPK__S_CMPK_LT_U32 - - // --- description from .arch file --- - // SCC = (S0.u < SIMM16). - void - Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() < simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_CMPK_LE_U32 class methods --- - - Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cmpk_le_u32") - { - setFlag(ALU); - } // Inst_SOPK__S_CMPK_LE_U32 - - Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() - { - } // ~Inst_SOPK__S_CMPK_LE_U32 - - // --- description from .arch file --- - // SCC = (S0.u <= SIMM16). - void - Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; - ConstScalarOperandU32 src(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - scc = (src.rawData() <= simm16) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPK__S_ADDK_I32 class methods --- - - Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_addk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_ADDK_I32 - - Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() - { - } // ~Inst_SOPK__S_ADDK_I32 - - // --- description from .arch file --- - // D.i = D.i + signext(SIMM16); - // SCC = overflow. - void - Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); - scc = (bits(src.rawData(), 31) == bits(simm16, 15) - && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOPK__S_MULK_I32 class methods --- - - Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_mulk_i32") - { - setFlag(ALU); - } // Inst_SOPK__S_MULK_I32 - - Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() - { - } // ~Inst_SOPK__S_MULK_I32 - - // --- description from .arch file --- - // D.i = D.i * signext(SIMM16). - void - Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandI32 src(gpuDynInst, instData.SDST); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16); - - sdst.write(); - } // execute - // --- Inst_SOPK__S_CBRANCH_I_FORK class methods --- - - Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_cbranch_i_fork") - { - setFlag(Branch); - } // Inst_SOPK__S_CBRANCH_I_FORK - - Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() - { - } // ~Inst_SOPK__S_CBRANCH_I_FORK - - // --- description from .arch file --- - // mask_pass = S0.u64 & EXEC; - // mask_fail = ~S0.u64 & EXEC; - // target_addr = PC + signext(SIMM16 * 4) + 4; - // if(mask_pass == EXEC) - // PC = target_addr; - // elsif(mask_fail == EXEC) - // PC += 4; - // elsif(bitcount(mask_fail) < bitcount(mask_pass)) - // EXEC = mask_fail; - // SGPR[CSP*4] = { target_addr, mask_pass }; - // CSP++; - // PC += 4; - // else - // EXEC = mask_pass; - // SGPR[CSP*4] = { PC + 4, mask_fail }; - // CSP++; - // PC = target_addr; - // end. - // Conditional branch using branch-stack. - // S0 = compare mask(vcc or any sgpr), and - // SIMM16 = signed DWORD branch offset relative to next instruction. - // See also S_CBRANCH_JOIN. - void - Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPK__S_GETREG_B32 class methods --- - - Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_getreg_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_GETREG_B32 - - Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() - { - } // ~Inst_SOPK__S_GETREG_B32 - - // --- description from .arch file --- - // D.u = hardware-reg. Read some or all of a hardware register into the - // LSBs of D. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarRegU32 hwreg = - gpuDynInst->computeUnit()->shader->getHwReg(hwregId); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - sdst.read(); - - // Store value from hardware to part of the SDST. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - sdst = (hwreg & mask) >> offset; - sdst.write(); - } // execute - // --- Inst_SOPK__S_SETREG_B32 class methods --- - - Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_setreg_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_SETREG_B32 - - Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() - { - } // ~Inst_SOPK__S_SETREG_B32 - - // --- description from .arch file --- - // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware - // register. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarRegU32 hwreg = - gpuDynInst->computeUnit()->shader->getHwReg(hwregId); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - sdst.read(); - - // Store value from SDST to part of the hardware register. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask)); - gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); - - // set MODE register to control the behavior of single precision - // floating-point numbers: denormal mode or round mode - if (hwregId==1 && size==2 - && (offset==4 || offset==0)) { - warn_once("Be cautious that s_setreg_b32 has no real effect " - "on FP modes: %s\n", gpuDynInst->disassemble()); - return; - } - - // panic if not changing MODE of floating-point numbers - panicUnimplemented(); - } // execute - // --- Inst_SOPK__S_SETREG_IMM32_B32 class methods --- - - Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( - InFmt_SOPK *iFmt) - : Inst_SOPK(iFmt, "s_setreg_imm32_b32") - { - setFlag(ALU); - } // Inst_SOPK__S_SETREG_IMM32_B32 - - Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() - { - } // ~Inst_SOPK__S_SETREG_IMM32_B32 - - // --- description from .arch file --- - // Write some or all of the LSBs of IMM32 into a hardware register; this - // --- instruction requires a 32-bit literal constant. - // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size - // is 1..32. - void - Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI16 simm16 = instData.SIMM16; - ScalarRegU32 hwregId = simm16 & 0x3f; - ScalarRegU32 offset = (simm16 >> 6) & 31; - ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; - - ScalarRegU32 hwreg = - gpuDynInst->computeUnit()->shader->getHwReg(hwregId); - ScalarRegI32 simm32 = extData.imm_u32; - - // Store value from SIMM32 to part of the hardware register. - ScalarRegU32 mask = (((1U << size) - 1U) << offset); - hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask)); - gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); - - // set MODE register to control the behavior of single precision - // floating-point numbers: denormal mode or round mode - if (hwregId==HW_REG_MODE && size==2 - && (offset==4 || offset==0)) { - warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " - "on FP modes: %s\n", gpuDynInst->disassemble()); - return; - } - - // panic if not changing modes of single-precision FPs - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_MOV_B32 class methods --- - - Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_B32 - - Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() - { - } // ~Inst_SOP1__S_MOV_B32 - - // --- description from .arch file --- - // D.u = S0.u. - void - Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOV_B64 class methods --- - - Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_B64 - - Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() - { - } // ~Inst_SOP1__S_MOV_B64 - - // --- description from .arch file --- - // D.u64 = S0.u64. - void - Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_CMOV_B32 class methods --- - - Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cmov_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_CMOV_B32 - - Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() - { - } // ~Inst_SOP1__S_CMOV_B32 - - // --- description from .arch file --- - // (SCC) then D.u = S0.u; - // else NOP. - // Conditional move. - void - Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - scc.read(); - - if (scc.rawData()) { - sdst = src.rawData(); - sdst.write(); - } - } // execute - // --- Inst_SOP1__S_CMOV_B64 class methods --- - - Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cmov_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_CMOV_B64 - - Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() - { - } // ~Inst_SOP1__S_CMOV_B64 - - // --- description from .arch file --- - // if(SCC) then D.u64 = S0.u64; - // else NOP. - // Conditional move. - void - Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - scc.read(); - - if (scc.rawData()) { - sdst = src.rawData(); - sdst.write(); - } - } // execute - // --- Inst_SOP1__S_NOT_B32 class methods --- - - Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_not_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_NOT_B32 - - Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() - { - } // ~Inst_SOP1__S_NOT_B32 - - // --- description from .arch file --- - // D.u = ~S0.u; - // SCC = 1 if result is non-zero. - // Bitwise negation. - void - Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = ~src.rawData(); - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_NOT_B64 class methods --- - - Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_not_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_NOT_B64 - - Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() - { - } // ~Inst_SOP1__S_NOT_B64 - - // --- description from .arch file --- - // D.u64 = ~S0.u64; - // SCC = 1 if result is non-zero. - // Bitwise negation. - void - Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = ~src.rawData(); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_WQM_B32 class methods --- - - Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_wqm_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_WQM_B32 - - Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() - { - } // ~Inst_SOP1__S_WQM_B32 - - // --- description from .arch file --- - // D[i] = (S0[(i & ~3):(i | 3)] != 0); - // Computes whole quad mode for an active/valid mask. - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wholeQuadMode(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_WQM_B64 class methods --- - - Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_wqm_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_WQM_B64 - - Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() - { - } // ~Inst_SOP1__S_WQM_B64 - - // --- description from .arch file --- - // D[i] = (S0[(i & ~3):(i | 3)] != 0); - // Computes whole quad mode for an active/valid mask. - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wholeQuadMode(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BREV_B32 class methods --- - - Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_brev_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BREV_B32 - - Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() - { - } // ~Inst_SOP1__S_BREV_B32 - - // --- description from .arch file --- - // D.u[31:0] = S0.u[0:31] (reverse bits). - void - Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = reverseBits(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BREV_B64 class methods --- - - Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_brev_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BREV_B64 - - Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() - { - } // ~Inst_SOP1__S_BREV_B64 - - // --- description from .arch file --- - // D.u64[63:0] = S0.u64[0:63] (reverse bits). - void - Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = reverseBits(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BCNT0_I32_B32 class methods --- - - Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT0_I32_B32 - - Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() - { - } // ~Inst_SOP1__S_BCNT0_I32_B32 - - // --- description from .arch file --- - // D.i = CountZeroBits(S0.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = countZeroBits(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BCNT0_I32_B64 class methods --- - - Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT0_I32_B64 - - Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() - { - } // ~Inst_SOP1__S_BCNT0_I32_B64 - - // --- description from .arch file --- - // D.i = CountZeroBits(S0.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = countZeroBits(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BCNT1_I32_B32 class methods --- - - Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT1_I32_B32 - - Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() - { - } // ~Inst_SOP1__S_BCNT1_I32_B32 - - // --- description from .arch file --- - // D.i = CountOneBits(S0.u); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = popCount(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_BCNT1_I32_B64 class methods --- - - Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BCNT1_I32_B64 - - Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() - { - } // ~Inst_SOP1__S_BCNT1_I32_B64 - - // --- description from .arch file --- - // D.i = CountOneBits(S0.u64); - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = popCount(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_FF0_I32_B32 class methods --- - - Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff0_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FF0_I32_B32 - - Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() - { - } // ~Inst_SOP1__S_FF0_I32_B32 - - // --- description from .arch file --- - // D.i = FindFirstZero(S0.u); - // If no zeros are found, return -1. - // Returns the bit position of the first zero from the LSB. - void - Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstZero(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FF0_I32_B64 class methods --- - - Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff0_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FF0_I32_B64 - - Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() - { - } // ~Inst_SOP1__S_FF0_I32_B64 - - // --- description from .arch file --- - // D.i = FindFirstZero(S0.u64); - // If no zeros are found, return -1. - // Returns the bit position of the first zero from the LSB. - void - Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstZero(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FF1_I32_B32 class methods --- - - Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff1_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FF1_I32_B32 - - Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() - { - } // ~Inst_SOP1__S_FF1_I32_B32 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u); - // If no ones are found, return -1. - // Returns the bit position of the first one from the LSB. - void - Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstOne(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FF1_I32_B64 class methods --- - - Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_ff1_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FF1_I32_B64 - - Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() - { - } // ~Inst_SOP1__S_FF1_I32_B64 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u64); - // If no ones are found, return -1. - // Returns the bit position of the first one from the LSB. - void - Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = findFirstOne(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32_B32 class methods --- - - Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_B32 - - Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() - { - } // ~Inst_SOP1__S_FLBIT_I32_B32 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u); - // If no ones are found, return -1. - // Counts how many zeros before the first one starting from the MSB. - void - Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = countZeroBitsMsb(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32_B64 class methods --- - - Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_B64 - - Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() - { - } // ~Inst_SOP1__S_FLBIT_I32_B64 - - // --- description from .arch file --- - // D.i = FindFirstOne(S0.u64); - // If no ones are found, return -1. - // Counts how many zeros before the first one starting from the MSB. - void - Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = countZeroBitsMsb(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32 class methods --- - - Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32 - - Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() - { - } // ~Inst_SOP1__S_FLBIT_I32 - - // --- description from .arch file --- - // D.i = FirstOppositeSignBit(S0.i); - // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. - // Counts how many bits in a row (from MSB to LSB) are the same as the - // sign bit. - void - Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = firstOppositeSignBit(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_FLBIT_I32_I64 class methods --- - - Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_flbit_i32_i64") - { - setFlag(ALU); - } // Inst_SOP1__S_FLBIT_I32_I64 - - Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() - { - } // ~Inst_SOP1__S_FLBIT_I32_I64 - - // --- description from .arch file --- - // D.i = FirstOppositeSignBit(S0.i64); - // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. - // Counts how many bits in a row (from MSB to LSB) are the same as the - // sign bit. - void - Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = firstOppositeSignBit(src.rawData()); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_SEXT_I32_I8 class methods --- - - Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_sext_i32_i8") - { - setFlag(ALU); - } // Inst_SOP1__S_SEXT_I32_I8 - - Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() - { - } // ~Inst_SOP1__S_SEXT_I32_I8 - - // --- description from .arch file --- - // D.i = signext(S0.i[7:0]) (sign extension). - void - Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = sext::digits>( - bits(src.rawData(), 7, 0)); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_SEXT_I32_I16 class methods --- - - Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_sext_i32_i16") - { - setFlag(ALU); - } // Inst_SOP1__S_SEXT_I32_I16 - - Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() - { - } // ~Inst_SOP1__S_SEXT_I32_I16 - - // --- description from .arch file --- - // D.i = signext(S0.i[15:0]) (sign extension). - void - Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = sext::digits>( - bits(src.rawData(), 15, 0)); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET0_B32 class methods --- - - Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset0_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET0_B32 - - Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() - { - } // ~Inst_SOP1__S_BITSET0_B32 - - // --- description from .arch file --- - // D.u[S0.u[4:0]] = 0. - void - Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 4, 0), 0); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET0_B64 class methods --- - - Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset0_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET0_B64 - - Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() - { - } // ~Inst_SOP1__S_BITSET0_B64 - - // --- description from .arch file --- - // D.u64[S0.u[5:0]] = 0. - void - Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 5, 0), 0); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET1_B32 class methods --- - - Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset1_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET1_B32 - - Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() - { - } // ~Inst_SOP1__S_BITSET1_B32 - - // --- description from .arch file --- - // D.u[S0.u[4:0]] = 1. - void - Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 4, 0), 1); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_BITSET1_B64 class methods --- - - Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_bitset1_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_BITSET1_B64 - - Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() - { - } // ~Inst_SOP1__S_BITSET1_B64 - - // --- description from .arch file --- - // D.u64[S0.u[5:0]] = 1. - void - Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst.setBit(bits(src.rawData(), 5, 0), 1); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_GETPC_B64 class methods --- - - Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_getpc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_GETPC_B64 - - Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() - { - } // ~Inst_SOP1__S_GETPC_B64 - - // --- description from .arch file --- - // D.u64 = PC + 4. - // Destination receives the byte address of the next instruction. - void - Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Addr pc = gpuDynInst->pc(); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - sdst = pc + 4; - - sdst.write(); - } // execute - // --- Inst_SOP1__S_SETPC_B64 class methods --- - - Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_setpc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_SETPC_B64 - - Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() - { - } // ~Inst_SOP1__S_SETPC_B64 - - // --- description from .arch file --- - // PC = S0.u64. - // S0.u64 is a byte address of the instruction to jump to. - void - Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - - src.read(); - - wf->pc(src.rawData()); - } // execute - // --- Inst_SOP1__S_SWAPPC_B64 class methods --- - - Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_swappc_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_SWAPPC_B64 - - Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() - { - } // ~Inst_SOP1__S_SWAPPC_B64 - - // --- description from .arch file --- - // D.u64 = PC + 4; PC = S0.u64. - // S0.u64 is a byte address of the instruction to jump to. - void - Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = pc + 4; - - wf->pc(src.rawData()); - sdst.write(); - } // execute - // --- Inst_SOP1__S_RFE_B64 class methods --- - - Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_rfe_b64") - { - } // Inst_SOP1__S_RFE_B64 - - Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() - { - } // ~Inst_SOP1__S_RFE_B64 - - // --- description from .arch file --- - // PRIV = 0; - // PC = S0.u64. - // Return from exception handler and continue. - // This instruction may only be used within a trap handler. - void - Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_and_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_AND_SAVEEXEC_B64 - - Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 & EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() & wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_or_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_OR_SAVEEXEC_B64 - - Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 | EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() | wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_xor_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_XOR_SAVEEXEC_B64 - - Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 ^ EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 & ~EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_ORN2_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_orn2_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = S0.u64 | ~EXEC; - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = src.rawData() |~ wf->execMask().to_ullong(); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_NAND_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_nand_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_NAND_SAVEEXEC_B64 - - Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = ~(S0.u64 & EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_NOR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_nor_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_NOR_SAVEEXEC_B64 - - Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = ~(S0.u64 | EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_XNOR_SAVEEXEC_B64 class methods --- - - Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_xnor_saveexec_b64") - { - setFlag(ALU); - setFlag(ReadsEXEC); - setFlag(WritesEXEC); - } // Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64() - { - } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64 - - // --- description from .arch file --- - // D.u64 = EXEC; - // EXEC = ~(S0.u64 ^ EXEC); - // SCC = 1 if the new value of EXEC is non-zero. - void - Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = wf->execMask().to_ullong(); - wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong()); - scc = wf->execMask().any() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_QUADMASK_B32 class methods --- - - Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_quadmask_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_QUADMASK_B32 - - Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32() - { - } // ~Inst_SOP1__S_QUADMASK_B32 - - // --- description from .arch file --- - // D.u = QuadMask(S0.u): - // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0; - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = quadMask(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_QUADMASK_B64 class methods --- - - Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_quadmask_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_QUADMASK_B64 - - Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64() - { - } // ~Inst_SOP1__S_QUADMASK_B64 - - // --- description from .arch file --- - // D.u64 = QuadMask(S0.u64): - // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0; - // SCC = 1 if result is non-zero. - void - Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = quadMask(src.rawData()); - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_MOVRELS_B32 class methods --- - - Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movrels_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELS_B32 - - Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32() - { - } // ~Inst_SOP1__S_MOVRELS_B32 - - // --- description from .arch file --- - // D.u = SGPR[S0.u + M0.u].u (move from relative source). - void - Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData()); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOVRELS_B64 class methods --- - - Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movrels_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELS_B64 - - Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64() - { - } // ~Inst_SOP1__S_MOVRELS_B64 - - // --- description from .arch file --- - // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source). - // The index in M0.u must be even for this operation. - void - Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData()); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOVRELD_B32 class methods --- - - Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movreld_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELD_B32 - - Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32() - { - } // ~Inst_SOP1__S_MOVRELD_B32 - - // --- description from .arch file --- - // SGPR[D.u + M0.u].u = S0.u (move to relative destination). - void - Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData()); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_MOVRELD_B64 class methods --- - - Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_movreld_b64") - { - setFlag(ALU); - } // Inst_SOP1__S_MOVRELD_B64 - - Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64() - { - } // ~Inst_SOP1__S_MOVRELD_B64 - - // --- description from .arch file --- - // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination). - // The index in M0.u must be even for this operation. - void - Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 m0(gpuDynInst, REG_M0); - m0.read(); - ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData()); - - src.read(); - - sdst = src.rawData(); - - sdst.write(); - } // execute - // --- Inst_SOP1__S_CBRANCH_JOIN class methods --- - - Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_cbranch_join") - { - setFlag(Branch); - setFlag(WritesEXEC); - } // Inst_SOP1__S_CBRANCH_JOIN - - Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN() - { - } // ~Inst_SOP1__S_CBRANCH_JOIN - - // --- description from .arch file --- - // saved_csp = S0.u; - // if(CSP == saved_csp) then - // PC += 4; // Second time to JOIN: continue with program. - // else - // CSP -= 1; // First time to JOIN; jump to other FORK path. - // {PC, EXEC} = SGPR[CSP * 4]; // Read 128 bits from 4 consecutive - // SGPRs. - // end - // Conditional branch join point (end of conditional branch block). S0 is - // saved CSP value. - // See S_CBRANCH_G_FORK and S_CBRANCH_I_FORK for related instructions. - void - Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_ABS_I32 class methods --- - - Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_abs_i32") - { - setFlag(ALU); - } // Inst_SOP1__S_ABS_I32 - - Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32() - { - } // ~Inst_SOP1__S_ABS_I32 - - // --- description from .arch file --- - // if(S.i < 0) then D.i = -S.i; - // else D.i = S.i; - // SCC = 1 if result is non-zero. - // Integer absolute value. - void - Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); - ScalarOperandI32 sdst(gpuDynInst, instData.SDST); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src.read(); - - sdst = std::abs(src.rawData()); - - scc = sdst.rawData() ? 1 : 0; - - sdst.write(); - scc.write(); - } // execute - // --- Inst_SOP1__S_MOV_FED_B32 class methods --- - - Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_mov_fed_b32") - { - setFlag(ALU); - } // Inst_SOP1__S_MOV_FED_B32 - - Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32() - { - } // ~Inst_SOP1__S_MOV_FED_B32 - - // --- description from .arch file --- - // D.u = S0.u. Introduce an EDC double-detect error on write to the - // destination SGPR. - void - Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOP1__S_SET_GPR_IDX_IDX class methods --- - - Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX( - InFmt_SOP1 *iFmt) - : Inst_SOP1(iFmt, "s_set_gpr_idx_idx") - { - } // Inst_SOP1__S_SET_GPR_IDX_IDX - - Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX() - { - } // ~Inst_SOP1__S_SET_GPR_IDX_IDX - - // --- description from .arch file --- - // M0[7:0] = S0.u[7:0]. - // Modify the index used in vector GPR indexing. - void - Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPC__S_CMP_EQ_I32 class methods --- - - Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_I32 - - Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32() - { - } // ~Inst_SOPC__S_CMP_EQ_I32 - - // --- description from .arch file --- - // SCC = (S0.i == S1.i). - void - Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LG_I32 class methods --- - - Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_I32 - - Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32() - { - } // ~Inst_SOPC__S_CMP_LG_I32 - - // --- description from .arch file --- - // SCC = (S0.i != S1.i). - void - Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GT_I32 class methods --- - - Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_gt_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GT_I32 - - Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32() - { - } // ~Inst_SOPC__S_CMP_GT_I32 - - // --- description from .arch file --- - // SCC = (S0.i > S1.i). - void - Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GE_I32 class methods --- - - Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_ge_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GE_I32 - - Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32() - { - } // ~Inst_SOPC__S_CMP_GE_I32 - - // --- description from .arch file --- - // SCC = (S0.i >= S1.i). - void - Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LT_I32 class methods --- - - Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lt_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LT_I32 - - Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32() - { - } // ~Inst_SOPC__S_CMP_LT_I32 - - // --- description from .arch file --- - // SCC = (S0.i < S1.i). - void - Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LE_I32 class methods --- - - Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_le_i32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LE_I32 - - Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32() - { - } // ~Inst_SOPC__S_CMP_LE_I32 - - // --- description from .arch file --- - // SCC = (S0.i <= S1.i). - void - Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_EQ_U32 class methods --- - - Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_U32 - - Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32() - { - } // ~Inst_SOPC__S_CMP_EQ_U32 - - // --- description from .arch file --- - // SCC = (S0.u == S1.u). - void - Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LG_U32 class methods --- - - Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_U32 - - Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32() - { - } // ~Inst_SOPC__S_CMP_LG_U32 - - // --- description from .arch file --- - // SCC = (S0.u != S1.u). - void - Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GT_U32 class methods --- - - Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_gt_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GT_U32 - - Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32() - { - } // ~Inst_SOPC__S_CMP_GT_U32 - - // --- description from .arch file --- - // SCC = (S0.u > S1.u). - void - Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() > src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_GE_U32 class methods --- - - Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_ge_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_GE_U32 - - Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32() - { - } // ~Inst_SOPC__S_CMP_GE_U32 - - // --- description from .arch file --- - // SCC = (S0.u >= S1.u). - void - Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LT_U32 class methods --- - - Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lt_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LT_U32 - - Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32() - { - } // ~Inst_SOPC__S_CMP_LT_U32 - - // --- description from .arch file --- - // SCC = (S0.u < S1.u). - void - Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() < src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LE_U32 class methods --- - - Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_le_u32") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LE_U32 - - Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32() - { - } // ~Inst_SOPC__S_CMP_LE_U32 - - // --- description from .arch file --- - // SCC = (S0.u <= S1.u). - void - Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP0_B32 class methods --- - - Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp0_b32") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP0_B32 - - Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32() - { - } // ~Inst_SOPC__S_BITCMP0_B32 - - // --- description from .arch file --- - // SCC = (S0.u[S1.u[4:0]] == 0). - void - Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP1_B32 class methods --- - - Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp1_b32") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP1_B32 - - Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32() - { - } // ~Inst_SOPC__S_BITCMP1_B32 - - // --- description from .arch file --- - // SCC = (S0.u[S1.u[4:0]] == 1). - void - Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP0_B64 class methods --- - - Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp0_b64") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP0_B64 - - Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64() - { - } // ~Inst_SOPC__S_BITCMP0_B64 - - // --- description from .arch file --- - // SCC = (S0.u64[S1.u[5:0]] == 0). - void - Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_BITCMP1_B64 class methods --- - - Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_bitcmp1_b64") - { - setFlag(ALU); - } // Inst_SOPC__S_BITCMP1_B64 - - Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64() - { - } // ~Inst_SOPC__S_BITCMP1_B64 - - // --- description from .arch file --- - // SCC = (S0.u64[S1.u[5:0]] == 1). - void - Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_SETVSKIP class methods --- - - Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_setvskip") - { - } // Inst_SOPC__S_SETVSKIP - - Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP() - { - } // ~Inst_SOPC__S_SETVSKIP - - // --- description from .arch file --- - // VSKIP = S0.u[S1.u[4:0]]. - // Enables and disables VSKIP mode. - // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are - // issued. - // If any vector operations are outstanding, S_WAITCNT must be issued - // before executing. - // This instruction requires one waitstate after executing (e.g. S_NOP 0). - // Example: - // s_waitcnt 0 - // s_setvskip 1, 0 // Enable vskip mode. - // s_nop 1 - void - Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPC__S_SET_GPR_IDX_ON class methods --- - - Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_set_gpr_idx_on") - { - } // Inst_SOPC__S_SET_GPR_IDX_ON - - Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON() - { - } // ~Inst_SOPC__S_SET_GPR_IDX_ON - - // --- description from .arch file --- - // MODE.gpr_idx_en = 1; - // M0[7:0] = S0.u[7:0]; - // M0[15:12] = SIMM4 (direct contents of S1 field); - // // Remaining bits of M0 are unmodified. - // Enable GPR indexing mode. Vector operations after this will perform - // relative GPR addressing based on the contents of M0. The structure - // SQ_M0_GPR_IDX_WORD may be used to decode M0. - // The raw contents of the S1 field are read and used to set the enable - // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and - // S1[3] = VDST_REL. - void - Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPC__S_CMP_EQ_U64 class methods --- - - Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_eq_u64") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_EQ_U64 - - Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64() - { - } // ~Inst_SOPC__S_CMP_EQ_U64 - - // --- description from .arch file --- - // SCC = (S0.i64 == S1.i64). - void - Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() == src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPC__S_CMP_LG_U64 class methods --- - - Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt) - : Inst_SOPC(iFmt, "s_cmp_lg_u64") - { - setFlag(ALU); - } // Inst_SOPC__S_CMP_LG_U64 - - Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64() - { - } // ~Inst_SOPC__S_CMP_LG_U64 - - // --- description from .arch file --- - // SCC = (S0.i64 != S1.i64). - void - Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); - ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); - ScalarOperandU32 scc(gpuDynInst, REG_SCC); - - src0.read(); - src1.read(); - - scc = (src0.rawData() != src1.rawData()) ? 1 : 0; - - scc.write(); - } // execute - // --- Inst_SOPP__S_NOP class methods --- - - Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_nop") - { - setFlag(Nop); - } // Inst_SOPP__S_NOP - - Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP() - { - } // ~Inst_SOPP__S_NOP - - // --- description from .arch file --- - // Do nothing. Repeat NOP 1..8 times based on SIMM16[2:0] -- 0 = 1 time, - // 7 = 8 times. - // This instruction may be used to introduce wait states to resolve - // hazards; see the shader programming guide for details. Compare with - // S_SLEEP. - void - Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_SOPP__S_ENDPGM class methods --- - - Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_endpgm") - { - setFlag(EndOfKernel); - } // Inst_SOPP__S_ENDPGM - - Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM() - { - } // ~Inst_SOPP__S_ENDPGM - - // --- description from .arch file --- - // End of program; terminate wavefront. - // The hardware implicitly executes S_WAITCNT 0 before executing this - // --- instruction. - // See S_ENDPGM_SAVED for the context-switch version of this instruction. - void - Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ComputeUnit *cu = gpuDynInst->computeUnit(); - - // delete extra instructions fetched for completed work-items - wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, - wf->instructionBuffer.end()); - - if (wf->pendingFetch) { - wf->dropFetch = true; - } - - wf->computeUnit->fetchStage.fetchUnit(wf->simdId) - .flushBuf(wf->wfSlotId); - wf->setStatus(Wavefront::S_STOPPED); - - int refCount = wf->computeUnit->getLds() - .decreaseRefCounter(wf->dispatchId, wf->wgId); - - /** - * The parent WF of this instruction is exiting, therefore - * it should not participate in this barrier any longer. This - * prevents possible deadlock issues if WFs exit early. - */ - int bar_id = WFBarrier::InvalidID; - if (wf->hasBarrier()) { - assert(wf->getStatus() != Wavefront::S_BARRIER); - bar_id = wf->barrierId(); - assert(bar_id != WFBarrier::InvalidID); - wf->releaseBarrier(); - cu->decMaxBarrierCnt(bar_id); - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the " - "program and decrementing max barrier count for " - "barrier Id%d. New max count: %d.\n", cu->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id, - cu->maxBarrierCnt(bar_id)); - } - - DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", - wf->computeUnit->cu_id, wf->wgId, refCount); - - wf->computeUnit->registerManager->freeRegisters(wf); - wf->computeUnit->stats.completedWfs++; - wf->computeUnit->activeWaves--; - - panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less " - "than zero\n", wf->computeUnit->cu_id); - - DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", - wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId); - - for (int i = 0; i < wf->vecReads.size(); i++) { - if (wf->rawDist.find(i) != wf->rawDist.end()) { - wf->stats.readsPerWrite.sample(wf->vecReads.at(i)); - } - } - wf->vecReads.clear(); - wf->rawDist.clear(); - wf->lastInstExec = 0; - - if (!refCount) { - /** - * If all WFs have finished, and hence the WG has finished, - * then we can free up the barrier belonging to the parent - * WG, but only if we actually used a barrier (i.e., more - * than one WF in the WG). - */ - if (bar_id != WFBarrier::InvalidID) { - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are " - "now complete. Releasing barrier Id%d.\n", cu->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId, - wf->barrierId()); - cu->releaseBarrier(bar_id); - } - - /** - * Last wavefront of the workgroup has executed return. If the - * workgroup is not the final one in the kernel, then simply - * retire it; however, if it is the final one, i.e., indicating - * the kernel end, then release operation (i.e., GL2 WB) is - * needed - */ - - //check whether the workgroup is indicating the kernel end, i.e., - //the last workgroup in the kernel - bool kernelEnd = - wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); - - bool relNeeded = - wf->computeUnit->shader->impl_kern_end_rel; - - //if it is not a kernel end, then retire the workgroup directly - if (!kernelEnd || !relNeeded) { - wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); - wf->setStatus(Wavefront::S_STOPPED); - wf->computeUnit->stats.completedWGs++; - - return; - } - - /** - * if it is a kernel end, inject a memory sync, i.e., GL2 WB, and - * retire the workgroup after receving response. - * note that GL0V and GL1 are read only, and they just forward GL2 - * WB request. When forwarding, GL1 send the request to all GL2 in - * the complex - */ - setFlag(MemSync); - setFlag(GlobalSegment); - // Notify Memory System of Kernel Completion - // Kernel End = isKernel + isMemSync - wf->setStatus(Wavefront::S_RETURNING); - gpuDynInst->simdId = wf->simdId; - gpuDynInst->wfSlotId = wf->wfSlotId; - gpuDynInst->wfDynId = wf->wfDynId; - - DPRINTF(GPUExec, "inject global memory fence for CU%d: " - "WF[%d][%d][%d]\n", wf->computeUnit->cu_id, - wf->simdId, wf->wfSlotId, wf->wfDynId); - - // call shader to prepare the flush operations - wf->computeUnit->shader->prepareFlush(gpuDynInst); - - wf->computeUnit->stats.completedWGs++; - } else { - wf->computeUnit->shader->dispatcher().scheduleDispatch(); - } - } // execute - - // --- Inst_SOPP__S_BRANCH class methods --- - - Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_branch") - { - setFlag(Branch); - } // Inst_SOPP__S_BRANCH - - Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH() - { - } // ~Inst_SOPP__S_BRANCH - - // --- description from .arch file --- - // PC = PC + signext(SIMM16 * 4) + 4 (short jump). - // For a long jump, use S_SETPC. - void - Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_WAKEUP class methods --- - - Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_wakeup") - { - } // Inst_SOPP__S_WAKEUP - - Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP() - { - } // ~Inst_SOPP__S_WAKEUP - - // --- description from .arch file --- - // Allow a wave to 'ping' all the other waves in its threadgroup to force - // them to wake up immediately from an S_SLEEP instruction. The ping is - // ignored if the waves are not sleeping. - // This allows for more efficient polling on a memory location. The waves - // which are polling can sit in a long S_SLEEP between memory reads, but - // the wave which writes the value can tell them all to wake up early now - // that the data is available. This is useful for fBarrier implementations - // (speedup). - // This method is also safe from races because if any wave misses the ping, - // everything still works fine (whoever missed it just completes their - // normal S_SLEEP). - void - Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_SCC0 class methods --- - - Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_scc0") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_SCC0 - - Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0() - { - } // ~Inst_SOPP__S_CBRANCH_SCC0 - - // --- description from .arch file --- - // if(SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (!scc.rawData()) { - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - } - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_CBRANCH_SCC1 class methods --- - - Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_scc1") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_SCC1 - - Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1() - { - } // ~Inst_SOPP__S_CBRANCH_SCC1 - - // --- description from .arch file --- - // if(SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); - - scc.read(); - - if (scc.rawData()) { - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - } - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_CBRANCH_VCCZ class methods --- - - Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_vccz") - { - setFlag(Branch); - setFlag(ReadsVCC); - } // Inst_SOPP__S_CBRANCH_VCCZ - - Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ() - { - } // ~Inst_SOPP__S_CBRANCH_VCCZ - - // --- description from .arch file --- - // if(VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - - vcc.read(); - - if (!vcc.rawData()) { - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - } - - wf->pc(pc); - } // execute - // --- Inst_SOPP__S_CBRANCH_VCCNZ class methods --- - - Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_vccnz") - { - setFlag(Branch); - setFlag(ReadsVCC); - } // Inst_SOPP__S_CBRANCH_VCCNZ - - Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ() - { - } // ~Inst_SOPP__S_CBRANCH_VCCNZ - - // --- description from .arch file --- - // if(VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - vcc.read(); - - if (vcc.rawData()) { - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - wf->pc(pc); - } - } // execute - // --- Inst_SOPP__S_CBRANCH_EXECZ class methods --- - - Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_execz") - { - setFlag(Branch); - setFlag(ReadsEXEC); - } // Inst_SOPP__S_CBRANCH_EXECZ - - Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ() - { - } // ~Inst_SOPP__S_CBRANCH_EXECZ - - // --- description from .arch file --- - // if(EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (wf->execMask().none()) { - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - wf->pc(pc); - } - } // execute - // --- Inst_SOPP__S_CBRANCH_EXECNZ class methods --- - - Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_execnz") - { - setFlag(Branch); - setFlag(ReadsEXEC); - } // Inst_SOPP__S_CBRANCH_EXECNZ - - Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ() - { - } // ~Inst_SOPP__S_CBRANCH_EXECNZ - - // --- description from .arch file --- - // if(EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (wf->execMask().any()) { - Addr pc = gpuDynInst->pc(); - ScalarRegI16 simm16 = instData.SIMM16; - pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; - wf->pc(pc); - } - } // execute - // --- Inst_SOPP__S_BARRIER class methods --- - - Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_barrier") - { - setFlag(MemBarrier); - } // Inst_SOPP__S_BARRIER - - Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER() - { - } // ~Inst_SOPP__S_BARRIER - - // --- description from .arch file --- - // Synchronize waves within a threadgroup. - // If not all waves of the threadgroup have been created yet, waits for - // entire group before proceeding. - // If some waves in the threadgroup have already terminated, this waits on - // only the surviving waves. - // Barriers are legal inside trap handlers. - void - Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ComputeUnit *cu = gpuDynInst->computeUnit(); - - if (wf->hasBarrier()) { - int bar_id = wf->barrierId(); - assert(wf->getStatus() == Wavefront::S_BARRIER); - cu->incNumAtBarrier(bar_id); - DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " - "barrier Id%d. %d waves now at barrier, %d waves " - "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId, - wf->wfDynId, bar_id, cu->numAtBarrier(bar_id), - cu->numYetToReachBarrier(bar_id)); - } - } // execute - // --- Inst_SOPP__S_SETKILL class methods --- - - Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_setkill") - { - } // Inst_SOPP__S_SETKILL - - Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL() - { - } // ~Inst_SOPP__S_SETKILL - - // --- description from .arch file --- - // set KILL bit to value of SIMM16[0]. - // Used primarily for debugging kill wave host command behavior. - void - Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_WAITCNT class methods --- - - Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_waitcnt") - { - setFlag(ALU); - setFlag(Waitcnt); - } // Inst_SOPP__S_WAITCNT - - Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT() - { - } // ~Inst_SOPP__S_WAITCNT - - // --- description from .arch file --- - // Wait for the counts of outstanding lds, vector-memory and - // --- export/vmem-write-data to be at or below the specified levels. - // SIMM16[3:0] = vmcount (vector memory operations), - // SIMM16[6:4] = export/mem-write-data count, - // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count). - void - Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 vm_cnt = 0; - ScalarRegI32 exp_cnt = 0; - ScalarRegI32 lgkm_cnt = 0; - vm_cnt = bits(instData.SIMM16, 3, 0); - exp_cnt = bits(instData.SIMM16, 6, 4); - lgkm_cnt = bits(instData.SIMM16, 12, 8); - gpuDynInst->wavefront()->setStatus(Wavefront::S_WAITCNT); - gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt); - } // execute - // --- Inst_SOPP__S_SETHALT class methods --- - - Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sethalt") - { - } // Inst_SOPP__S_SETHALT - - Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT() - { - } // ~Inst_SOPP__S_SETHALT - - // --- description from .arch file --- - // Set HALT bit to value of SIMM16[0]; 1 = halt, 0 = resume. - // The halt flag is ignored while PRIV == 1 (inside trap handlers) but the - // shader will halt immediately after the handler returns if HALT is still - // set at that time. - void - Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SLEEP class methods --- - - Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sleep") - { - setFlag(ALU); - setFlag(Sleep); - } // Inst_SOPP__S_SLEEP - - Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() - { - } // ~Inst_SOPP__S_SLEEP - - // --- description from .arch file --- - // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks. - // The exact amount of delay is approximate. Compare with S_NOP. - void - Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; - gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); - // sleep duration is specified in multiples of 64 cycles - gpuDynInst->wavefront()->setSleepTime(64 * simm16); - } // execute - // --- Inst_SOPP__S_SETPRIO class methods --- - - Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_setprio") - { - setFlag(ALU); - } // Inst_SOPP__S_SETPRIO - - Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO() - { - } // ~Inst_SOPP__S_SETPRIO - - // --- description from .arch file --- - // User settable wave priority is set to SIMM16[1:0]. 0 = lowest, - // 3 = highest. - // The overall wave priority is {SPIPrio[1:0] + UserPrio[1:0], - // WaveAge[3:0]}. - void - Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst) - { - ScalarRegU16 simm16 = instData.SIMM16; - ScalarRegU32 userPrio = simm16 & 0x3; - - warn_once("S_SETPRIO ignored -- Requested priority %d\n", userPrio); - } // execute - // --- Inst_SOPP__S_SENDMSG class methods --- - - Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sendmsg") - { - } // Inst_SOPP__S_SENDMSG - - Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG() - { - } // ~Inst_SOPP__S_SENDMSG - - // --- description from .arch file --- - // Send a message upstream to VGT or the interrupt handler. - // SIMM16[9:0] contains the message type and is documented in the shader - // --- programming guide. - void - Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SENDMSGHALT class methods --- - - Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_sendmsghalt") - { - } // Inst_SOPP__S_SENDMSGHALT - - Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT() - { - } // ~Inst_SOPP__S_SENDMSGHALT - - // --- description from .arch file --- - // Send a message and then HALT the wavefront; see S_SENDMSG for details. - void - Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_TRAP class methods --- - - Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_trap") - { - } // Inst_SOPP__S_TRAP - - Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP() - { - } // ~Inst_SOPP__S_TRAP - - // --- description from .arch file --- - // TrapID = SIMM16[7:0]; - // Wait for all instructions to complete; - // set {TTMP1, TTMP0} = {3'h0, PCRewind[3:0], HT[0], TrapID[7:0], - // PC[47:0]}; - // PC = TBA (trap base address); - // PRIV = 1. - // Enter the trap handler. This instruction may be generated internally as - // well in response to a host trap (HT = 1) or an exception. - // TrapID 0 is reserved for hardware use and should not be used in a - // shader-generated trap. - void - Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_ICACHE_INV class methods --- - - Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_icache_inv") - { - } // Inst_SOPP__S_ICACHE_INV - - Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV() - { - } // ~Inst_SOPP__S_ICACHE_INV - - // --- description from .arch file --- - // Invalidate entire L1 instruction cache. - // You must have 12 separate S_NOP instructions or a jump/branch - // instruction after this instruction - // to ensure the SQ instruction buffer is purged. - void - Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_INCPERFLEVEL class methods --- - - Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_incperflevel") - { - } // Inst_SOPP__S_INCPERFLEVEL - - Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL() - { - } // ~Inst_SOPP__S_INCPERFLEVEL - - // --- description from .arch file --- - // Increment performance counter specified in SIMM16[3:0] by 1. - void - Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_DECPERFLEVEL class methods --- - - Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_decperflevel") - { - } // Inst_SOPP__S_DECPERFLEVEL - - Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL() - { - } // ~Inst_SOPP__S_DECPERFLEVEL - - // --- description from .arch file --- - // Decrement performance counter specified in SIMM16[3:0] by 1. - void - Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_TTRACEDATA class methods --- - - Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_ttracedata") - { - } // Inst_SOPP__S_TTRACEDATA - - Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA() - { - } // ~Inst_SOPP__S_TTRACEDATA - - // --- description from .arch file --- - // Send M0 as user data to the thread trace stream. - void - Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGSYS class methods --- - - Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS - - Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS - - // --- description from .arch file --- - // if(conditional_debug_system != 0) then PC = PC + signext(SIMM16 * 4) - // + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGUSER class methods --- - - Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbguser") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGUSER - - Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGUSER - - // --- description from .arch file --- - // if(conditional_debug_user != 0) then PC = PC + signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER class methods --- - - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER:: - ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER - - // --- description from .arch file --- - // if(conditional_debug_system || conditional_debug_user) then PC = PC + - // --- signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER class methods --- - - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user") - { - setFlag(Branch); - } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: - ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER() - { - } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER - - // --- description from .arch file --- - // if(conditional_debug_system && conditional_debug_user) then PC = PC + - // --- signext(SIMM16 * 4) + 4; - // else NOP. - void - Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_ENDPGM_SAVED class methods --- - - Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_endpgm_saved") - { - } // Inst_SOPP__S_ENDPGM_SAVED - - Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED() - { - } // ~Inst_SOPP__S_ENDPGM_SAVED - - // --- description from .arch file --- - // End of program; signal that a wave has been saved by the context-switch - // trap handler and terminate wavefront. - // The hardware implicitly executes S_WAITCNT 0 before executing this - // instruction. - // Use S_ENDPGM in all cases unless you are executing the context-switch - // save handler. - void - Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SET_GPR_IDX_OFF class methods --- - - Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_set_gpr_idx_off") - { - } // Inst_SOPP__S_SET_GPR_IDX_OFF - - Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF() - { - } // ~Inst_SOPP__S_SET_GPR_IDX_OFF - - // --- description from .arch file --- - // MODE.gpr_idx_en = 0. - // Clear GPR indexing mode. Vector operations after this will not perform - // --- relative GPR addressing regardless of the contents of M0. This - // --- instruction does not modify M0. - void - Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SOPP__S_SET_GPR_IDX_MODE class methods --- - - Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE( - InFmt_SOPP *iFmt) - : Inst_SOPP(iFmt, "s_set_gpr_idx_mode") - { - } // Inst_SOPP__S_SET_GPR_IDX_MODE - - Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE() - { - } // ~Inst_SOPP__S_SET_GPR_IDX_MODE - - // --- description from .arch file --- - // M0[15:12] = SIMM4. - // Modify the mode used for vector GPR indexing. - // The raw contents of the source field are read and used to set the enable - // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL - // and SIMM4[3] = VDST_REL. - void - Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_LOAD_DWORD class methods --- - - Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORD - - Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD() - { - } // ~Inst_SMEM__S_LOAD_DWORD - - /** - * Read 1 dword from scalar data cache. If the offset is specified as an - * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are - * ignored). If the offset is specified as an immediate 20-bit constant, - * the constant is an unsigned byte offset. - */ - void - Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX2 class methods --- - - Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX2 - - Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2() - { - } // ~Inst_SMEM__S_LOAD_DWORDX2 - - /** - * Read 2 dwords from scalar data cache. See s_load_dword for details on - * the offset input. - */ - void - Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX4 class methods --- - - Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX4 - - Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4() - { - } // ~Inst_SMEM__S_LOAD_DWORDX4 - - // --- description from .arch file --- - // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX8 class methods --- - - Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX8 - - Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8() - { - } // ~Inst_SMEM__S_LOAD_DWORDX8 - - // --- description from .arch file --- - // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<8>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_LOAD_DWORDX16 class methods --- - - Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_load_dwordx16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_LOAD_DWORDX16 - - Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16() - { - } // ~Inst_SMEM__S_LOAD_DWORDX16 - - // --- description from .arch file --- - // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - - addr.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<16>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORD class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORD - - Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD - - // --- description from .arch file --- - // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the - // --- offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 1 request, size 32 - ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX2 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2 - - // --- description from .arch file --- - // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - // use U64 because 2 requests, each size 32 - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX4 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4 - - // --- description from .arch file --- - // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 4 requests, each size 32 - ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX8 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8 - - // --- description from .arch file --- - // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<8>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 8 requests, each size 32 - ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX16 class methods --- - - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_load_dwordx16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16() - { - } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16 - - // --- description from .arch file --- - // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); - - rsrcDesc.read(); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, rsrcDesc, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe - .issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<16>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) - { - // 16 requests, each size 32 - ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); - sdst.write(); - } // completeAcc - // --- Inst_SMEM__S_STORE_DWORD class methods --- - - Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORD - - Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD() - { - } // ~Inst_SMEM__S_STORE_DWORD - - // --- description from .arch file --- - // Write 1 dword to scalar data cache. - // If the offset is specified as an SGPR, the SGPR contains an unsigned - // BYTE offset (the 2 LSBs are ignored). - // If the offset is specified as an immediate 20-bit constant, the - // constant is an unsigned BYTE offset. - void - Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(ScalarRegU32)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<1>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_STORE_DWORDX2 class methods --- - - Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORDX2 - - Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2() - { - } // ~Inst_SMEM__S_STORE_DWORDX2 - - // --- description from .arch file --- - // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(ScalarRegU64)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<2>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_STORE_DWORDX4 class methods --- - - Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_STORE_DWORDX4 - - Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4() - { - } // ~Inst_SMEM__S_STORE_DWORDX4 - - // --- description from .arch file --- - // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - ScalarRegU32 offset(0); - ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); - ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); - - addr.read(); - sdata.read(); - - std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), - sizeof(gpuDynInst->scalar_data)); - - if (instData.IMM) { - offset = extData.OFFSET; - } else { - ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); - off_sgpr.read(); - offset = off_sgpr.rawData(); - } - - calcAddr(gpuDynInst, addr, offset); - - gpuDynInst->computeUnit()->scalarMemoryPipe. - issueRequest(gpuDynInst); - } // execute - - void - Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_BUFFER_STORE_DWORD class methods --- - - Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORD - - Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORD - - // --- description from .arch file --- - // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the - // --- offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_BUFFER_STORE_DWORDX2 class methods --- - - Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2 - - // --- description from .arch file --- - // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_BUFFER_STORE_DWORDX4 class methods --- - - Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_buffer_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4() - { - } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4 - - // --- description from .arch file --- - // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on - // the offset input. - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_SMEM__S_DCACHE_INV class methods --- - - Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_inv") - { - } // Inst_SMEM__S_DCACHE_INV - - Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV() - { - } // ~Inst_SMEM__S_DCACHE_INV - - // --- description from .arch file --- - // Invalidate the scalar data cache. - void - Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_DCACHE_WB class methods --- - - Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_wb") - { - } // Inst_SMEM__S_DCACHE_WB - - Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB() - { - } // ~Inst_SMEM__S_DCACHE_WB - - // --- description from .arch file --- - // Write back dirty data in the scalar data cache. - void - Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_DCACHE_INV_VOL class methods --- - - Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_inv_vol") - { - } // Inst_SMEM__S_DCACHE_INV_VOL - - Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL() - { - } // ~Inst_SMEM__S_DCACHE_INV_VOL - - // --- description from .arch file --- - // Invalidate the scalar data cache volatile lines. - void - Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_DCACHE_WB_VOL class methods --- - - Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_dcache_wb_vol") - { - } // Inst_SMEM__S_DCACHE_WB_VOL - - Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL() - { - } // ~Inst_SMEM__S_DCACHE_WB_VOL - - // --- description from .arch file --- - // Write back dirty data in the scalar data cache volatile lines. - void - Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_MEMTIME class methods --- - - Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_memtime") - { - // s_memtime does not issue a memory request - setFlag(ALU); - } // Inst_SMEM__S_MEMTIME - - Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() - { - } // ~Inst_SMEM__S_MEMTIME - - // --- description from .arch file --- - // Return current 64-bit timestamp. - void - Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) - { - ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); - sdst = (ScalarRegU64)gpuDynInst->computeUnit()->curCycle(); - sdst.write(); - } // execute - // --- Inst_SMEM__S_MEMREALTIME class methods --- - - Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_memrealtime") - { - } // Inst_SMEM__S_MEMREALTIME - - Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME() - { - } // ~Inst_SMEM__S_MEMREALTIME - - // --- description from .arch file --- - // Return current 64-bit RTC. - void - Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_ATC_PROBE class methods --- - - Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_atc_probe") - { - } // Inst_SMEM__S_ATC_PROBE - - Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE() - { - } // ~Inst_SMEM__S_ATC_PROBE - - // --- description from .arch file --- - // Probe or prefetch an address into the SQC data cache. - void - Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_SMEM__S_ATC_PROBE_BUFFER class methods --- - - Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER( - InFmt_SMEM *iFmt) - : Inst_SMEM(iFmt, "s_atc_probe_buffer") - { - } // Inst_SMEM__S_ATC_PROBE_BUFFER - - Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER() - { - } // ~Inst_SMEM__S_ATC_PROBE_BUFFER - - // --- description from .arch file --- - // Probe or prefetch an address into the SQC data cache. - void - Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_CNDMASK_B32 class methods --- - - Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_cndmask_b32") - { - setFlag(ALU); - setFlag(ReadsVCC); - } // Inst_VOP2__V_CNDMASK_B32 - - Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() - { - } // ~Inst_VOP2__V_CNDMASK_B32 - - // --- description from .arch file --- - // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC - // as a scalar GPR in S2. - void - Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ADD_F32 class methods --- - - Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_ADD_F32 - - Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() - { - } // ~Inst_VOP2__V_ADD_F32 - - // --- description from .arch file --- - // D.f = S0.f + S1.f. - void - Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - VecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isDPPInst()) { - VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_dpp[lane] + src1[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_F32 class methods --- - - Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_SUB_F32 - - Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() - { - } // ~Inst_VOP2__V_SUB_F32 - - // --- description from .arch file --- - // D.f = S0.f - S1.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_F32 class methods --- - - Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_SUBREV_F32 - - Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() - { - } // ~Inst_VOP2__V_SUBREV_F32 - - // --- description from .arch file --- - // D.f = S1.f - S0.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_LEGACY_F32 class methods --- - - Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MUL_LEGACY_F32 - - Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() - { - } // ~Inst_VOP2__V_MUL_LEGACY_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). - void - Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_F32 class methods --- - - Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MUL_F32 - - Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() - { - } // ~Inst_VOP2__V_MUL_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f. - void - Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_I32_I24 class methods --- - - Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_i32_i24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_I32_I24 - - Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() - { - } // ~Inst_VOP2__V_MUL_I32_I24 - - // --- description from .arch file --- - // D.i = S0.i[23:0] * S1.i[23:0]. - void - Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) - * sext<24>(bits(src1[lane], 23, 0)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_HI_I32_I24 class methods --- - - Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_hi_i32_i24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_HI_I32_I24 - - Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() - { - } // ~Inst_VOP2__V_MUL_HI_I32_I24 - - // --- description from .arch file --- - // D.i = (S0.i[23:0] * S1.i[23:0])>>32. - void - Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 tmp_src0 - = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); - VecElemI64 tmp_src1 - = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); - - vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_U32_U24 class methods --- - - Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_u32_u24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_U32_U24 - - Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() - { - } // ~Inst_VOP2__V_MUL_U32_U24 - - // --- description from .arch file --- - // D.u = S0.u[23:0] * S1.u[23:0]. - void - Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1, - VecOperandU32& vdst, Wavefront* wf) { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * - bits(src1[lane], 23, 0); - } - } - }; - - vop2Helper(gpuDynInst, opImpl); - } // execute - // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods --- - - Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_hi_u32_u24") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_HI_U32_U24 - - Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() - { - } // ~Inst_VOP2__V_MUL_HI_U32_U24 - - // --- description from .arch file --- - // D.i = (S0.u[23:0] * S1.u[23:0])>>32. - void - Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); - VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); - vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_F32 class methods --- - - Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MIN_F32 - - Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() - { - } // ~Inst_VOP2__V_MIN_F32 - - // --- description from .arch file --- - // D.f = (S0.f < S1.f ? S0.f : S1.f). - void - Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_F32 class methods --- - - Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP2__V_MAX_F32 - - Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() - { - } // ~Inst_VOP2__V_MAX_F32 - - // --- description from .arch file --- - // D.f = (S0.f >= S1.f ? S0.f : S1.f). - void - Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_I32 class methods --- - - Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_I32 - - Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() - { - } // ~Inst_VOP2__V_MIN_I32 - - // --- description from .arch file --- - // D.i = min(S0.i, S1.i). - void - Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_I32 class methods --- - - Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_I32 - - Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() - { - } // ~Inst_VOP2__V_MAX_I32 - - // --- description from .arch file --- - // D.i = max(S0.i, S1.i). - void - Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_U32 class methods --- - - Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_U32 - - Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() - { - } // ~Inst_VOP2__V_MIN_U32 - - // --- description from .arch file --- - // D.u = min(S0.u, S1.u). - void - Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_U32 class methods --- - - Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_U32 - - Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() - { - } // ~Inst_VOP2__V_MAX_U32 - - // --- description from .arch file --- - // D.u = max(S0.u, S1.u). - void - Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHRREV_B32 class methods --- - - Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshrrev_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHRREV_B32 - - Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() - { - } // ~Inst_VOP2__V_LSHRREV_B32 - - // --- description from .arch file --- - // D.u = S1.u >> S0.u[4:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ASHRREV_I32 class methods --- - - Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ashrrev_i32") - { - setFlag(ALU); - } // Inst_VOP2__V_ASHRREV_I32 - - Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() - { - } // ~Inst_VOP2__V_ASHRREV_I32 - - // --- description from .arch file --- - // D.i = signext(S1.i) >> S0.i[4:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHLREV_B32 class methods --- - - Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshlrev_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHLREV_B32 - - Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() - { - } // ~Inst_VOP2__V_LSHLREV_B32 - - // --- description from .arch file --- - // D.u = S1.u << S0.u[4:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and vdst during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register " - "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: " - "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " - "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0); - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_AND_B32 class methods --- - - Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_and_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_AND_B32 - - Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() - { - } // ~Inst_VOP2__V_AND_B32 - - // --- description from .arch file --- - // D.u = S0.u & S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isDPPInst()) { - VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_dpp[lane] & src1[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_OR_B32 class methods --- - - Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_or_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_OR_B32 - - Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() - { - } // ~Inst_VOP2__V_OR_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] | src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_XOR_B32 class methods --- - - Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_xor_b32") - { - setFlag(ALU); - } // Inst_VOP2__V_XOR_B32 - - Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() - { - } // ~Inst_VOP2__V_XOR_B32 - - // --- description from .arch file --- - // D.u = S0.u ^ S1.u. - // Input and output modifiers not supported. - void - Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] ^ src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAC_F32 class methods --- - - Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mac_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAC); - } // Inst_VOP2__V_MAC_F32 - - Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() - { - } // ~Inst_VOP2__V_MAC_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + D.f. - // SQ translates to V_MAD_F32. - void - Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - VecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - vdst.read(); - - if (isDPPInst()) { - VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src0_dpp.read(); - - DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0_dpp[lane], src1[lane], - vdst[lane]); - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MADMK_F32 class methods --- - - Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madmk_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP2__V_MADMK_F32 - - Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() - { - } // ~Inst_VOP2__V_MADMK_F32 - - // --- description from .arch file --- - // D.f = S0.f * K + S1.f; K is a 32-bit inline constant. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // --- modifiers. - // SQ translates to V_MAD_F32. - void - Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - VecElemF32 k = extData.imm_f32; - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], k, src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MADAK_F32 class methods --- - - Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madak_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP2__V_MADAK_F32 - - Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() - { - } // ~Inst_VOP2__V_MADAK_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + K; K is a 32-bit inline constant. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // --- modifiers. - // SQ translates to V_MAD_F32. - void - Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - VecElemF32 k = extData.imm_f32; - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], k); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ADD_CO_U32 class methods --- - - Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_ADD_CO_U32 - - Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32() - { - } // ~Inst_VOP2__V_ADD_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED - // --- overflow or carry-out for V_ADDC_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register " - "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] + src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane] - + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); - } - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_CO_U32 class methods --- - - Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_SUB_CO_U32 - - Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32() - { - } // ~Inst_VOP2__V_SUB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_CO_U32 class methods --- - - Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP2__V_SUBREV_CO_U32 - - Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32() - { - } // ~Inst_VOP2__V_SUBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_ADDC_CO_U32 class methods --- - - Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_addc_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_ADDC_CO_U32 - - Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32() - { - } // ~Inst_VOP2__V_ADDC_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + VCC[threadId]; - // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) - // is an UNSIGNED overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] - + bits(vcc.rawData(), lane); - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] - + (VecElemU64)bits(vcc.rawData(), lane, lane)) - >= 0x100000000 ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_SUBB_CO_U32 class methods --- - - Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subb_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_SUBB_CO_U32 - - Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32() - { - } // ~Inst_VOP2__V_SUBB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // --- overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // --- source comes from the SGPR-pair at S2.u. - void - Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src0[lane] - src1[lane] - bits(vcc.rawData(), lane); - vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods --- - - Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subbrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP2__V_SUBBREV_CO_U32 - - Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32() - { - } // ~Inst_VOP2__V_SUBBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. - // SQ translates this to V_SUBREV_U32 with reversed operands. - void - Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - vcc.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src1[lane] - src0[lane] - bits(vcc.rawData(), lane); - vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane)) - > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP2__V_ADD_F16 class methods --- - - Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_ADD_F16 - - Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() - { - } // ~Inst_VOP2__V_ADD_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_SUB_F16 class methods --- - - Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_SUB_F16 - - Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() - { - } // ~Inst_VOP2__V_SUB_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 - S1.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_SUBREV_F16 class methods --- - - Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_SUBREV_F16 - - Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() - { - } // ~Inst_VOP2__V_SUBREV_F16 - - // --- description from .arch file --- - // D.f16 = S1.f16 - S0.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MUL_F16 class methods --- - - Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MUL_F16 - - Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() - { - } // ~Inst_VOP2__V_MUL_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MAC_F16 class methods --- - - Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mac_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAC); - } // Inst_VOP2__V_MAC_F16 - - Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() - { - } // ~Inst_VOP2__V_MAC_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + D.f16. - // Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MADMK_F16 class methods --- - - Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madmk_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP2__V_MADMK_F16 - - Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() - { - } // ~Inst_VOP2__V_MADMK_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored - // in the following literal DWORD. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // modifiers. Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MADAK_F16 class methods --- - - Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_madak_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP2__V_MADAK_F16 - - Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() - { - } // ~Inst_VOP2__V_MADAK_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored - // in the following literal DWORD. - // This opcode cannot use the VOP3 encoding and cannot use input/output - // modifiers. Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_ADD_U16 class methods --- - - Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_ADD_U16 - - Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() - { - } // ~Inst_VOP2__V_ADD_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 + S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_U16 class methods --- - - Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_SUB_U16 - - Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() - { - } // ~Inst_VOP2__V_SUB_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 - S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_U16 class methods --- - - Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_SUBREV_U16 - - Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() - { - } // ~Inst_VOP2__V_SUBREV_U16 - - // --- description from .arch file --- - // D.u16 = S1.u16 - S0.u16. - // Supports saturation (unsigned 16-bit integer domain). - // SQ translates this to V_SUB_U16 with reversed operands. - void - Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MUL_LO_U16 class methods --- - - Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_mul_lo_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MUL_LO_U16 - - Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() - { - } // ~Inst_VOP2__V_MUL_LO_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 * S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHLREV_B16 class methods --- - - Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshlrev_b16") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHLREV_B16 - - Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() - { - } // ~Inst_VOP2__V_LSHLREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LSHRREV_B16 class methods --- - - Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_lshrrev_b16") - { - setFlag(ALU); - } // Inst_VOP2__V_LSHRREV_B16 - - Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() - { - } // ~Inst_VOP2__V_LSHRREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_ASHRREV_I16 class methods --- - - Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ashrrev_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_ASHRREV_I16 - - Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() - { - } // ~Inst_VOP2__V_ASHRREV_I16 - - // --- description from .arch file --- - // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_F16 class methods --- - - Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MAX_F16 - - Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() - { - } // ~Inst_VOP2__V_MAX_F16 - - // --- description from .arch file --- - // D.f16 = max(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MIN_F16 class methods --- - - Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_MIN_F16 - - Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() - { - } // ~Inst_VOP2__V_MIN_F16 - - // --- description from .arch file --- - // D.f16 = min(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_MAX_U16 class methods --- - - Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_U16 - - Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() - { - } // ~Inst_VOP2__V_MAX_U16 - - // --- description from .arch file --- - // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MAX_I16 class methods --- - - Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_max_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_MAX_I16 - - Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() - { - } // ~Inst_VOP2__V_MAX_I16 - - // --- description from .arch file --- - // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_U16 class methods --- - - Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_u16") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_U16 - - Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() - { - } // ~Inst_VOP2__V_MIN_U16 - - // --- description from .arch file --- - // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_MIN_I16 class methods --- - - Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_min_i16") - { - setFlag(ALU); - } // Inst_VOP2__V_MIN_I16 - - Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() - { - } // ~Inst_VOP2__V_MIN_I16 - - // --- description from .arch file --- - // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_LDEXP_F16 class methods --- - - Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_ldexp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP2__V_LDEXP_F16 - - Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() - { - } // ~Inst_VOP2__V_LDEXP_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * (2 ** S1.i16). - void - Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP2__V_ADD_U32 class methods --- - - Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_add_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_ADD_U32 - - Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() - { - } // ~Inst_VOP2__V_ADD_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - void - Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - VecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - if (isSDWAInst()) { - VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); - // use copies of original src0, src1, and dest during selecting - VecOperandU32 origSrc0_sdwa(gpuDynInst, - extData.iFmt_VOP_SDWA.SRC0); - VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); - VecOperandU32 origVdst(gpuDynInst, instData.VDST); - - src0_sdwa.read(); - origSrc0_sdwa.read(); - origSrc1.read(); - - DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " - "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " - "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " - "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", - extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, - extData.iFmt_VOP_SDWA.DST_U, - extData.iFmt_VOP_SDWA.CLMP, - extData.iFmt_VOP_SDWA.SRC0_SEL, - extData.iFmt_VOP_SDWA.SRC0_SEXT, - extData.iFmt_VOP_SDWA.SRC0_NEG, - extData.iFmt_VOP_SDWA.SRC0_ABS, - extData.iFmt_VOP_SDWA.SRC1_SEL, - extData.iFmt_VOP_SDWA.SRC1_SEXT, - extData.iFmt_VOP_SDWA.SRC1_NEG, - extData.iFmt_VOP_SDWA.SRC1_ABS); - - processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, - src1, origSrc1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0_sdwa[lane] + src1[lane]; - origVdst[lane] = vdst[lane]; // keep copy consistent - } - } - - processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUB_U32 class methods --- - - Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_sub_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_SUB_U32 - - Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() - { - } // ~Inst_VOP2__V_SUB_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - void - Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP2__V_SUBREV_U32 class methods --- - - Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) - : Inst_VOP2(iFmt, "v_subrev_u32") - { - setFlag(ALU); - } // Inst_VOP2__V_SUBREV_U32 - - Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() - { - } // ~Inst_VOP2__V_SUBREV_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - void - Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_NOP class methods --- - - Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_nop") - { - setFlag(Nop); - setFlag(ALU); - } // Inst_VOP1__V_NOP - - Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() - { - } // ~Inst_VOP1__V_NOP - - // --- description from .arch file --- - // Do nothing. - void - Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_VOP1__V_MOV_B32 class methods --- - - Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_mov_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_MOV_B32 - - Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() - { - } // ~Inst_VOP1__V_MOV_B32 - - // --- description from .arch file --- - // D.u = S0.u. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (isDPPInst()) { - VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); - src_dpp.read(); - - DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], " - "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " - "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " - "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, - extData.iFmt_VOP_DPP.DPP_CTRL, - extData.iFmt_VOP_DPP.SRC0_ABS, - extData.iFmt_VOP_DPP.SRC0_NEG, - extData.iFmt_VOP_DPP.SRC1_ABS, - extData.iFmt_VOP_DPP.SRC1_NEG, - extData.iFmt_VOP_DPP.BC, - extData.iFmt_VOP_DPP.BANK_MASK, - extData.iFmt_VOP_DPP.ROW_MASK); - - // NOTE: For VOP1, there is no SRC1, so make sure we're not trying - // to negate it or take the absolute value of it - assert(!extData.iFmt_VOP_DPP.SRC1_ABS); - assert(!extData.iFmt_VOP_DPP.SRC1_NEG); - processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src_dpp[lane]; - } - } - } else { - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_READFIRSTLANE_B32 class methods --- - - Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_readfirstlane_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_READFIRSTLANE_B32 - - Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() - { - } // ~Inst_VOP1__V_READFIRSTLANE_B32 - - // --- description from .arch file --- - // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data - // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec) - // (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ - // translates to V_READLANE_B32. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarRegI32 src_lane(0); - ScalarRegU64 exec_mask = wf->execMask().to_ullong(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - ScalarOperandU32 sdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (exec_mask) { - src_lane = findLsbSet(exec_mask); - } - - sdst = src[src_lane]; - - sdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_I32_F64 class methods --- - - Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_I32_F64 - - Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() - { - } // ~Inst_VOP1__V_CVT_I32_F64 - - // --- description from .arch file --- - // D.i = (int)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F64_I32 class methods --- - - Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_i32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_I32 - - Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() - { - } // ~Inst_VOP1__V_CVT_F64_I32 - - // --- description from .arch file --- - // D.d = (double)S0.i. - void - Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_I32 class methods --- - - Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_i32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_I32 - - Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() - { - } // ~Inst_VOP1__V_CVT_F32_I32 - - // --- description from .arch file --- - // D.f = (float)S0.i. - void - Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_U32 class methods --- - - Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_u32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_U32 - - Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() - { - } // ~Inst_VOP1__V_CVT_F32_U32 - - // --- description from .arch file --- - // D.f = (float)S0.u. - void - Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_U32_F32 class methods --- - - Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_U32_F32 - - Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() - { - } // ~Inst_VOP1__V_CVT_U32_F32 - - // --- description from .arch file --- - // D.u = (unsigned)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_I32_F32 class methods --- - - Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_I32_F32 - - Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() - { - } // ~Inst_VOP1__V_CVT_I32_F32 - - // --- description from .arch file --- - // D.i = (int)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_MOV_FED_B32 class methods --- - - Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_mov_fed_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_MOV_FED_B32 - - Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() - { - } // ~Inst_VOP1__V_MOV_FED_B32 - - // --- description from .arch file --- - // D.u = S0.u; - // Introduce EDC double error upon write to dest vgpr without causing an - // --- exception. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F16_F32 class methods --- - - Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F16_F32 - - Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() - { - } // ~Inst_VOP1__V_CVT_F16_F32 - - // --- description from .arch file --- - // D.f16 = flt32_to_flt16(S0.f). - // Supports input modifiers and creates FP16 denormals when appropriate. - void - Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F32_F16 class methods --- - - Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_f16") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_F16 - - Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() - { - } // ~Inst_VOP1__V_CVT_F32_F16 - - // --- description from .arch file --- - // D.f = flt16_to_flt32(S0.f16). - // FP16 denormal inputs are always accepted. - void - Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods --- - - Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_RPI_I32_F32 - - Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() - { - } // ~Inst_VOP1__V_CVT_RPI_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f + 0.5). - void - Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods --- - - Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_FLR_I32_F32 - - Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() - { - } // ~Inst_VOP1__V_CVT_FLR_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f). - void - Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods --- - - Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_off_f32_i4") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_OFF_F32_I4 - - Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() - { - } // ~Inst_VOP1__V_CVT_OFF_F32_I4 - - // --- description from .arch file --- - // 4-bit signed int to 32-bit float. Used for interpolation in shader. - void - Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) - { - // Could not parse sq_uc.arch desc field - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F32_F64 class methods --- - - Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F32_F64 - - Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() - { - } // ~Inst_VOP1__V_CVT_F32_F64 - - // --- description from .arch file --- - // D.f = (float)S0.d. - void - Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F64_F32 class methods --- - - Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_f32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_F32 - - Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() - { - } // ~Inst_VOP1__V_CVT_F64_F32 - - // --- description from .arch file --- - // D.d = (double)S0.f. - void - Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE0 - - Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE0 - - // --- description from .arch file --- - // D.f = (float)(S0.u[7:0]). - void - Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE1 - - Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE1 - - // --- description from .arch file --- - // D.f = (float)(S0.u[15:8]). - void - Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE2 - - Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE2 - - // --- description from .arch file --- - // D.f = (float)(S0.u[23:16]). - void - Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods --- - - Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CVT_F32_UBYTE3 - - Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() - { - } // ~Inst_VOP1__V_CVT_F32_UBYTE3 - - // --- description from .arch file --- - // D.f = (float)(S0.u[31:24]). - void - Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_U32_F64 class methods --- - - Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_U32_F64 - - Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() - { - } // ~Inst_VOP1__V_CVT_U32_F64 - - // --- description from .arch file --- - // D.u = (unsigned)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CVT_F64_U32 class methods --- - - Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f64_u32") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CVT_F64_U32 - - Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() - { - } // ~Inst_VOP1__V_CVT_F64_U32 - - // --- description from .arch file --- - // D.d = (double)S0.u. - void - Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_TRUNC_F64 class methods --- - - Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_TRUNC_F64 - - Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() - { - } // ~Inst_VOP1__V_TRUNC_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d), return integer part of S0.d. - void - Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CEIL_F64 class methods --- - - Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_CEIL_F64 - - Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() - { - } // ~Inst_VOP1__V_CEIL_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. - void - Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RNDNE_F64 class methods --- - - Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RNDNE_F64 - - Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() - { - } // ~Inst_VOP1__V_RNDNE_F64 - - // --- description from .arch file --- - // D.d = round_nearest_even(S0.d). - void - Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FLOOR_F64 class methods --- - - Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FLOOR_F64 - - Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() - { - } // ~Inst_VOP1__V_FLOOR_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. - void - Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FRACT_F32 class methods --- - - Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FRACT_F32 - - Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() - { - } // ~Inst_VOP1__V_FRACT_F32 - - // --- description from .arch file --- - // D.f = S0.f - floor(S0.f). - void - Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_TRUNC_F32 class methods --- - - Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_TRUNC_F32 - - Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() - { - } // ~Inst_VOP1__V_TRUNC_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f), return integer part of S0.f. - void - Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst (gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CEIL_F32 class methods --- - - Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_CEIL_F32 - - Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() - { - } // ~Inst_VOP1__V_CEIL_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. - void - Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RNDNE_F32 class methods --- - - Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RNDNE_F32 - - Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() - { - } // ~Inst_VOP1__V_RNDNE_F32 - - // --- description from .arch file --- - // D.f = round_nearest_even(S0.f). - void - Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FLOOR_F32 class methods --- - - Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FLOOR_F32 - - Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() - { - } // ~Inst_VOP1__V_FLOOR_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. - void - Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_EXP_F32 class methods --- - - Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_EXP_F32 - - Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() - { - } // ~Inst_VOP1__V_EXP_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f). - void - Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_LOG_F32 class methods --- - - Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_LOG_F32 - - Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() - { - } // ~Inst_VOP1__V_LOG_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm. - void - Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RCP_F32 class methods --- - - Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RCP_F32 - - Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() - { - } // ~Inst_VOP1__V_RCP_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. - void - Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RCP_IFLAG_F32 class methods --- - - Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_iflag_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RCP_IFLAG_F32 - - Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() - { - } // ~Inst_VOP1__V_RCP_IFLAG_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise - // --- integer DIV_BY_ZERO exception but cannot raise floating-point - // --- exceptions. - void - Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RSQ_F32 class methods --- - - Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_RSQ_F32 - - Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() - { - } // ~Inst_VOP1__V_RSQ_F32 - - // --- description from .arch file --- - // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. - void - Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RCP_F64 class methods --- - - Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RCP_F64 - - Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() - { - } // ~Inst_VOP1__V_RCP_F64 - - // --- description from .arch file --- - // D.d = 1.0 / S0.d. - void - Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = 1.0 / src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_RSQ_F64 class methods --- - - Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_RSQ_F64 - - Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() - { - } // ~Inst_VOP1__V_RSQ_F64 - - // --- description from .arch file --- - // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. - void - Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane]) - && !std::signbit(src[lane])) { - vdst[lane] = 0.0; - } else if (std::signbit(src[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_SQRT_F32 class methods --- - - Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_SQRT_F32 - - Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() - { - } // ~Inst_VOP1__V_SQRT_F32 - - // --- description from .arch file --- - // D.f = sqrt(S0.f). - void - Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_SQRT_F64 class methods --- - - Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_SQRT_F64 - - Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() - { - } // ~Inst_VOP1__V_SQRT_F64 - - // --- description from .arch file --- - // D.d = sqrt(S0.d). - void - Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_SIN_F32 class methods --- - - Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sin_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_SIN_F32 - - Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() - { - } // ~Inst_VOP1__V_SIN_F32 - - // --- description from .arch file --- - // D.f = sin(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 0.0. - void - Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (src[lane] < -256.0 || src[lane] > 256.0) { - vdst[lane] = 0.0; - } else { - vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData()); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_COS_F32 class methods --- - - Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cos_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_COS_F32 - - Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() - { - } // ~Inst_VOP1__V_COS_F32 - - // --- description from .arch file --- - // D.f = cos(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 1.0. - void - Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (src[lane] < -256.0 || src[lane] > 256.0) { - vdst[lane] = 0.0; - } else { - vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData()); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_NOT_B32 class methods --- - - Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_not_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_NOT_B32 - - Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() - { - } // ~Inst_VOP1__V_NOT_B32 - - // --- description from .arch file --- - // D.u = ~S0.u. - // Input and output modifiers not supported. - void - Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ~src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_BFREV_B32 class methods --- - - Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_bfrev_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_BFREV_B32 - - Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() - { - } // ~Inst_VOP1__V_BFREV_B32 - - // --- description from .arch file --- - // D.u[31:0] = S0.u[0:31], bitfield reverse. - // Input and output modifiers not supported. - void - Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = reverseBits(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FFBH_U32 class methods --- - - Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbh_u32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBH_U32 - - Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() - { - } // ~Inst_VOP1__V_FFBH_U32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from MSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOneMsb(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FFBL_B32 class methods --- - - Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbl_b32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBL_B32 - - Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() - { - } // ~Inst_VOP1__V_FFBL_B32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from LSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOne(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FFBH_I32 class methods --- - - Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ffbh_i32") - { - setFlag(ALU); - } // Inst_VOP1__V_FFBH_I32 - - Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() - { - } // ~Inst_VOP1__V_FFBH_I32 - - // --- description from .arch file --- - // D.u = position of first bit different from sign bit in S0.i from MSB; - // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. - void - Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, instData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = firstOppositeSignBit(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods --- - - Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FREXP_EXP_I32_F64 - - Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() - { - } // ~Inst_VOP1__V_FREXP_EXP_I32_F64 - - // --- description from .arch file --- - // See V_FREXP_EXP_I32_F32. - void - Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp = 0; - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_MANT_F64 class methods --- - - Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FREXP_MANT_F64 - - Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() - { - } // ~Inst_VOP1__V_FREXP_MANT_F64 - - // --- description from .arch file --- - // See V_FREXP_MANT_F32. - void - Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FRACT_F64 class methods --- - - Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP1__V_FRACT_F64 - - Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() - { - } // ~Inst_VOP1__V_FRACT_F64 - - // --- description from .arch file --- - // See V_FRACT_F32. - void - Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, instData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF64 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods --- - - Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FREXP_EXP_I32_F32 - - Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() - { - } // ~Inst_VOP1__V_FREXP_EXP_I32_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.i = 0; - // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). - // Returns exponent of single precision float input, such that S0.f = - // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns - // the significand. - void - Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_FREXP_MANT_F32 class methods --- - - Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_FREXP_MANT_F32 - - Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() - { - } // ~Inst_VOP1__V_FREXP_MANT_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.f = S0.f; - // else D.f = Mantissa(S0.f). - // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary - // --- significand of single precision float input, such that S0.f = - // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which - // --- returns integer exponent. - void - Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_CLREXCP class methods --- - - Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_clrexcp") - { - setFlag(ALU); - } // Inst_VOP1__V_CLREXCP - - Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() - { - } // ~Inst_VOP1__V_CLREXCP - - // --- description from .arch file --- - // Clear wave's exception state in SIMD (SP). - void - Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F16_U16 class methods --- - - Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_u16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_F16_U16 - - Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() - { - } // ~Inst_VOP1__V_CVT_F16_U16 - - // --- description from .arch file --- - // D.f16 = uint16_to_flt16(S.u16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_F16_I16 class methods --- - - Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_f16_i16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_F16_I16 - - Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() - { - } // ~Inst_VOP1__V_CVT_F16_I16 - - // --- description from .arch file --- - // D.f16 = int16_to_flt16(S.i16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_U16_F16 class methods --- - - Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_u16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_U16_F16 - - Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() - { - } // ~Inst_VOP1__V_CVT_U16_F16 - - // --- description from .arch file --- - // D.u16 = flt16_to_uint16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CVT_I16_F16 class methods --- - - Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cvt_i16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CVT_I16_F16 - - Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() - { - } // ~Inst_VOP1__V_CVT_I16_F16 - - // --- description from .arch file --- - // D.i16 = flt16_to_int16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_RCP_F16 class methods --- - - Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rcp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RCP_F16 - - Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() - { - } // ~Inst_VOP1__V_RCP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecip(S0.f16). - void - Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_SQRT_F16 class methods --- - - Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sqrt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_SQRT_F16 - - Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() - { - } // ~Inst_VOP1__V_SQRT_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateSqrt(S0.f16). - void - Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_RSQ_F16 class methods --- - - Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rsq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RSQ_F16 - - Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() - { - } // ~Inst_VOP1__V_RSQ_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecipSqrt(S0.f16). - void - Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_LOG_F16 class methods --- - - Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_LOG_F16 - - Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() - { - } // ~Inst_VOP1__V_LOG_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 0.0f; - // else - // D.f16 = ApproximateLog2(S0.f16). - void - Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_EXP_F16 class methods --- - - Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_EXP_F16 - - Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() - { - } // ~Inst_VOP1__V_EXP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 0.0f) - // D.f16 = 1.0f; - // else - // D.f16 = Approximate2ToX(S0.f16). - void - Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FREXP_MANT_F16 class methods --- - - Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_mant_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FREXP_MANT_F16 - - Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() - { - } // ~Inst_VOP1__V_FREXP_MANT_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.f16 = S0.f16; - // else - // D.f16 = mantissa(S0.f16). - // Result range is (-1.0,-0.5][0.5,1.0). - // C math library frexp function. - // Returns binary significand of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods --- - - Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( - InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FREXP_EXP_I16_F16 - - Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() - { - } // ~Inst_VOP1__V_FREXP_EXP_I16_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.i16 = 0; - // else - // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). - // C math library frexp function. - // Returns exponent of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FLOOR_F16 class methods --- - - Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_floor_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FLOOR_F16 - - Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() - { - } // ~Inst_VOP1__V_FLOOR_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. - void - Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_CEIL_F16 class methods --- - - Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_ceil_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_CEIL_F16 - - Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() - { - } // ~Inst_VOP1__V_CEIL_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. - void - Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_TRUNC_F16 class methods --- - - Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_trunc_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_TRUNC_F16 - - Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() - { - } // ~Inst_VOP1__V_TRUNC_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16). - // Round-to-zero semantics. - void - Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_RNDNE_F16 class methods --- - - Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_rndne_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_RNDNE_F16 - - Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() - { - } // ~Inst_VOP1__V_RNDNE_F16 - - // --- description from .arch file --- - // D.f16 = FLOOR(S0.f16 + 0.5f); - // if(floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. - // Round-to-nearest-even semantics. - void - Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_FRACT_F16 class methods --- - - Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_fract_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_FRACT_F16 - - Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() - { - } // ~Inst_VOP1__V_FRACT_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + -floor(S0.f16). - void - Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_SIN_F16 class methods --- - - Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_sin_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_SIN_F16 - - Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() - { - } // ~Inst_VOP1__V_SIN_F16 - - // --- description from .arch file --- - // D.f16 = sin(S0.f16 * 2 * PI). - void - Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_COS_F16 class methods --- - - Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_cos_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP1__V_COS_F16 - - Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() - { - } // ~Inst_VOP1__V_COS_F16 - - // --- description from .arch file --- - // D.f16 = cos(S0.f16 * 2 * PI). - void - Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP1__V_EXP_LEGACY_F32 class methods --- - - Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_exp_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_EXP_LEGACY_F32 - - Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() - { - } // ~Inst_VOP1__V_EXP_LEGACY_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f) with legacy semantics. - void - Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP1__V_LOG_LEGACY_F32 class methods --- - - Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) - : Inst_VOP1(iFmt, "v_log_legacy_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP1__V_LOG_LEGACY_F32 - - Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() - { - } // ~Inst_VOP1__V_LOG_LEGACY_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. - void - Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, instData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOPC__V_CMP_CLASS_F32 class methods --- - - Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_CLASS_F32 - - Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32() - { - } // ~Inst_VOPC__V_CMP_CLASS_F32 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_CLASS_F32 class methods --- - - Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_CLASS_F32 - - Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F32 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f The function reports true if the floating point value is *any* of - // the numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMP_CLASS_F64 class methods --- - - Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_CLASS_F64 - - Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64() - { - } // ~Inst_VOPC__V_CMP_CLASS_F64 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.d - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_CLASS_F64 class methods --- - - Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_CLASS_F64 - - Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F64 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.d The function reports true if the floating point value is *any* of - // the numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - vcc.setBit(lane, 1); - continue; - } - } - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMP_CLASS_F16 class methods --- - - Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_class_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_CLASS_F16 - - Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16() - { - } // ~Inst_VOPC__V_CMP_CLASS_F16 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_CLASS_F16 class methods --- - - Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_class_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_CLASS_F16 - - Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16() - { - } // ~Inst_VOPC__V_CMPX_CLASS_F16 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // --- S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_F_F16 class methods --- - - Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_F_F16 - - Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16() - { - } // ~Inst_VOPC__V_CMP_F_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_LT_F16 class methods --- - - Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LT_F16 - - Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16() - { - } // ~Inst_VOPC__V_CMP_LT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_F16 class methods --- - - Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_EQ_F16 - - Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16() - { - } // ~Inst_VOPC__V_CMP_EQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_LE_F16 class methods --- - - Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LE_F16 - - Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16() - { - } // ~Inst_VOPC__V_CMP_LE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_GT_F16 class methods --- - - Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_GT_F16 - - Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16() - { - } // ~Inst_VOPC__V_CMP_GT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_LG_F16 class methods --- - - Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_LG_F16 - - Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16() - { - } // ~Inst_VOPC__V_CMP_LG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_GE_F16 class methods --- - - Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_GE_F16 - - Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16() - { - } // ~Inst_VOPC__V_CMP_GE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_O_F16 class methods --- - - Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_O_F16 - - Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16() - { - } // ~Inst_VOPC__V_CMP_O_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_U_F16 class methods --- - - Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_U_F16 - - Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16() - { - } // ~Inst_VOPC__V_CMP_U_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NGE_F16 class methods --- - - Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NGE_F16 - - Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16() - { - } // ~Inst_VOPC__V_CMP_NGE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NLG_F16 class methods --- - - Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLG_F16 - - Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16() - { - } // ~Inst_VOPC__V_CMP_NLG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NGT_F16 class methods --- - - Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NGT_F16 - - Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16() - { - } // ~Inst_VOPC__V_CMP_NGT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NLE_F16 class methods --- - - Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLE_F16 - - Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16() - { - } // ~Inst_VOPC__V_CMP_NLE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NEQ_F16 class methods --- - - Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NEQ_F16 - - Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16() - { - } // ~Inst_VOPC__V_CMP_NEQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_NLT_F16 class methods --- - - Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_NLT_F16 - - Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16() - { - } // ~Inst_VOPC__V_CMP_NLT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_TRU_F16 class methods --- - - Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f16") - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOPC__V_CMP_TRU_F16 - - Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16() - { - } // ~Inst_VOPC__V_CMP_TRU_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_F_F16 class methods --- - - Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_F16 - - Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16() - { - } // ~Inst_VOPC__V_CMPX_F_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_F16 class methods --- - - Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_F16 - - Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16() - { - } // ~Inst_VOPC__V_CMPX_LT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_F16 class methods --- - - Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_F16 - - Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16() - { - } // ~Inst_VOPC__V_CMPX_EQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_F16 class methods --- - - Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_F16 - - Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16() - { - } // ~Inst_VOPC__V_CMPX_LE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_F16 class methods --- - - Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_F16 - - Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16() - { - } // ~Inst_VOPC__V_CMPX_GT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_LG_F16 class methods --- - - Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LG_F16 - - Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16() - { - } // ~Inst_VOPC__V_CMPX_LG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_F16 class methods --- - - Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_F16 - - Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16() - { - } // ~Inst_VOPC__V_CMPX_GE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_O_F16 class methods --- - - Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_O_F16 - - Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16() - { - } // ~Inst_VOPC__V_CMPX_O_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_U_F16 class methods --- - - Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_U_F16 - - Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16() - { - } // ~Inst_VOPC__V_CMPX_U_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NGE_F16 class methods --- - - Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGE_F16 - - Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16() - { - } // ~Inst_VOPC__V_CMPX_NGE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NLG_F16 class methods --- - - Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLG_F16 - - Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16() - { - } // ~Inst_VOPC__V_CMPX_NLG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NGT_F16 class methods --- - - Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGT_F16 - - Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16() - { - } // ~Inst_VOPC__V_CMPX_NGT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NLE_F16 class methods --- - - Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLE_F16 - - Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16() - { - } // ~Inst_VOPC__V_CMPX_NLE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NEQ_F16 class methods --- - - Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NEQ_F16 - - Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_NLT_F16 class methods --- - - Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLT_F16 - - Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16() - { - } // ~Inst_VOPC__V_CMPX_NLT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMPX_TRU_F16 class methods --- - - Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f16") - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_TRU_F16 - - Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16() - { - } // ~Inst_VOPC__V_CMPX_TRU_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOPC__V_CMP_F_F32 class methods --- - - Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_F_F32 - - Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32() - { - } // ~Inst_VOPC__V_CMP_F_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_F32 class methods --- - - Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LT_F32 - - Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32() - { - } // ~Inst_VOPC__V_CMP_LT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_F32 class methods --- - - Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_EQ_F32 - - Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32() - { - } // ~Inst_VOPC__V_CMP_EQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_F32 class methods --- - - Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LE_F32 - - Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32() - { - } // ~Inst_VOPC__V_CMP_LE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_F32 class methods --- - - Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_GT_F32 - - Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32() - { - } // ~Inst_VOPC__V_CMP_GT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LG_F32 class methods --- - - Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_LG_F32 - - Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32() - { - } // ~Inst_VOPC__V_CMP_LG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_F32 class methods --- - - Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_GE_F32 - - Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32() - { - } // ~Inst_VOPC__V_CMP_GE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_O_F32 class methods --- - - Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_O_F32 - - Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32() - { - } // ~Inst_VOPC__V_CMP_O_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_U_F32 class methods --- - - Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_U_F32 - - Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32() - { - } // ~Inst_VOPC__V_CMP_U_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGE_F32 class methods --- - - Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NGE_F32 - - Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32() - { - } // ~Inst_VOPC__V_CMP_NGE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLG_F32 class methods --- - - Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLG_F32 - - Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32() - { - } // ~Inst_VOPC__V_CMP_NLG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGT_F32 class methods --- - - Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NGT_F32 - - Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32() - { - } // ~Inst_VOPC__V_CMP_NGT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLE_F32 class methods --- - - Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLE_F32 - - Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32() - { - } // ~Inst_VOPC__V_CMP_NLE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NEQ_F32 class methods --- - - Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NEQ_F32 - - Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32() - { - } // ~Inst_VOPC__V_CMP_NEQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLT_F32 class methods --- - - Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_NLT_F32 - - Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32() - { - } // ~Inst_VOPC__V_CMP_NLT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_TRU_F32 class methods --- - - Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOPC__V_CMP_TRU_F32 - - Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32() - { - } // ~Inst_VOPC__V_CMP_TRU_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_F32 class methods --- - - Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_F32 - - Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32() - { - } // ~Inst_VOPC__V_CMPX_F_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_F32 class methods --- - - Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_F32 - - Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32() - { - } // ~Inst_VOPC__V_CMPX_LT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_F32 class methods --- - - Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_F32 - - Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32() - { - } // ~Inst_VOPC__V_CMPX_EQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_F32 class methods --- - - Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_F32 - - Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32() - { - } // ~Inst_VOPC__V_CMPX_LE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_F32 class methods --- - - Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_F32 - - Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32() - { - } // ~Inst_VOPC__V_CMPX_GT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LG_F32 class methods --- - - Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LG_F32 - - Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32() - { - } // ~Inst_VOPC__V_CMPX_LG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_F32 class methods --- - - Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_F32 - - Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32() - { - } // ~Inst_VOPC__V_CMPX_GE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_O_F32 class methods --- - - Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_O_F32 - - Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32() - { - } // ~Inst_VOPC__V_CMPX_O_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_U_F32 class methods --- - - Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_U_F32 - - Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32() - { - } // ~Inst_VOPC__V_CMPX_U_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NGE_F32 class methods --- - - Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGE_F32 - - Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32() - { - } // ~Inst_VOPC__V_CMPX_NGE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NLG_F32 class methods --- - - Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLG_F32 - - Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32() - { - } // ~Inst_VOPC__V_CMPX_NLG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NGT_F32 class methods --- - - Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGT_F32 - - Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32() - { - } // ~Inst_VOPC__V_CMPX_NGT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NLE_F32 class methods --- - - Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLE_F32 - - Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32() - { - } // ~Inst_VOPC__V_CMPX_NLE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NEQ_F32 class methods --- - - Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NEQ_F32 - - Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_NLT_F32 class methods --- - - Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLT_F32 - - Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32() - { - } // ~Inst_VOPC__V_CMPX_NLT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_TRU_F32 class methods --- - - Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f32") - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_TRU_F32 - - Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32() - { - } // ~Inst_VOPC__V_CMPX_TRU_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMP_F_F64 class methods --- - - Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_F_F64 - - Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64() - { - } // ~Inst_VOPC__V_CMP_F_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_F64 class methods --- - - Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LT_F64 - - Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64() - { - } // ~Inst_VOPC__V_CMP_LT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_F64 class methods --- - - Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_EQ_F64 - - Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64() - { - } // ~Inst_VOPC__V_CMP_EQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_F64 class methods --- - - Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LE_F64 - - Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64() - { - } // ~Inst_VOPC__V_CMP_LE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_F64 class methods --- - - Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_GT_F64 - - Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64() - { - } // ~Inst_VOPC__V_CMP_GT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LG_F64 class methods --- - - Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_LG_F64 - - Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64() - { - } // ~Inst_VOPC__V_CMP_LG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_F64 class methods --- - - Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_GE_F64 - - Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64() - { - } // ~Inst_VOPC__V_CMP_GE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_O_F64 class methods --- - - Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_o_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_O_F64 - - Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64() - { - } // ~Inst_VOPC__V_CMP_O_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_U_F64 class methods --- - - Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_u_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_U_F64 - - Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64() - { - } // ~Inst_VOPC__V_CMP_U_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGE_F64 class methods --- - - Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nge_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NGE_F64 - - Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64() - { - } // ~Inst_VOPC__V_CMP_NGE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLG_F64 class methods --- - - Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlg_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLG_F64 - - Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64() - { - } // ~Inst_VOPC__V_CMP_NLG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NGT_F64 class methods --- - - Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ngt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NGT_F64 - - Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64() - { - } // ~Inst_VOPC__V_CMP_NGT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLE_F64 class methods --- - - Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nle_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLE_F64 - - Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64() - { - } // ~Inst_VOPC__V_CMP_NLE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NEQ_F64 class methods --- - - Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_neq_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NEQ_F64 - - Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64() - { - } // ~Inst_VOPC__V_CMP_NEQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NLT_F64 class methods --- - - Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_nlt_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_NLT_F64 - - Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64() - { - } // ~Inst_VOPC__V_CMP_NLT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_TRU_F64 class methods --- - - Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_tru_f64") - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOPC__V_CMP_TRU_F64 - - Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64() - { - } // ~Inst_VOPC__V_CMP_TRU_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_F64 class methods --- - - Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_F64 - - Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64() - { - } // ~Inst_VOPC__V_CMPX_F_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_F64 class methods --- - - Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_F64 - - Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64() - { - } // ~Inst_VOPC__V_CMPX_LT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_F64 class methods --- - - Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_F64 - - Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64() - { - } // ~Inst_VOPC__V_CMPX_EQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - wf->execMask() = vcc.rawData(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_F64 class methods --- - - Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_F64 - - Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64() - { - } // ~Inst_VOPC__V_CMPX_LE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_F64 class methods --- - - Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_F64 - - Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64() - { - } // ~Inst_VOPC__V_CMPX_GT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LG_F64 class methods --- - - Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lg_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LG_F64 - - Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64() - { - } // ~Inst_VOPC__V_CMPX_LG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_F64 class methods --- - - Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_F64 - - Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64() - { - } // ~Inst_VOPC__V_CMPX_GE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_O_F64 class methods --- - - Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_o_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_O_F64 - - Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64() - { - } // ~Inst_VOPC__V_CMPX_O_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_U_F64 class methods --- - - Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_u_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_U_F64 - - Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64() - { - } // ~Inst_VOPC__V_CMPX_U_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NGE_F64 class methods --- - - Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nge_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGE_F64 - - Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64() - { - } // ~Inst_VOPC__V_CMPX_NGE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NLG_F64 class methods --- - - Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlg_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLG_F64 - - Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64() - { - } // ~Inst_VOPC__V_CMPX_NLG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NGT_F64 class methods --- - - Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ngt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NGT_F64 - - Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64() - { - } // ~Inst_VOPC__V_CMPX_NGT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NLE_F64 class methods --- - - Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nle_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLE_F64 - - Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64() - { - } // ~Inst_VOPC__V_CMPX_NLE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NEQ_F64 class methods --- - - Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_neq_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NEQ_F64 - - Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64() - { - } // ~Inst_VOPC__V_CMPX_NEQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NLT_F64 class methods --- - - Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_nlt_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NLT_F64 - - Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64() - { - } // ~Inst_VOPC__V_CMPX_NLT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_TRU_F64 class methods --- - - Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_tru_f64") - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_TRU_F64 - - Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64() - { - } // ~Inst_VOPC__V_CMPX_TRU_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_I16 class methods --- - - Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I16 - - Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16() - { - } // ~Inst_VOPC__V_CMP_F_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_I16 class methods --- - - Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I16 - - Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16() - { - } // ~Inst_VOPC__V_CMP_LT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_I16 class methods --- - - Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I16 - - Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16() - { - } // ~Inst_VOPC__V_CMP_EQ_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_I16 class methods --- - - Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I16 - - Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16() - { - } // ~Inst_VOPC__V_CMP_LE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_I16 class methods --- - - Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I16 - - Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16() - { - } // ~Inst_VOPC__V_CMP_GT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_I16 class methods --- - - Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I16 - - Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16() - { - } // ~Inst_VOPC__V_CMP_NE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_I16 class methods --- - - Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I16 - - Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16() - { - } // ~Inst_VOPC__V_CMP_GE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_I16 class methods --- - - Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I16 - - Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16() - { - } // ~Inst_VOPC__V_CMP_T_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_U16 class methods --- - - Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U16 - - Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16() - { - } // ~Inst_VOPC__V_CMP_F_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_U16 class methods --- - - Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U16 - - Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16() - { - } // ~Inst_VOPC__V_CMP_LT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_U16 class methods --- - - Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U16 - - Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16() - { - } // ~Inst_VOPC__V_CMP_EQ_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_U16 class methods --- - - Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U16 - - Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16() - { - } // ~Inst_VOPC__V_CMP_LE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_U16 class methods --- - - Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U16 - - Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16() - { - } // ~Inst_VOPC__V_CMP_GT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_U16 class methods --- - - Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U16 - - Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16() - { - } // ~Inst_VOPC__V_CMP_NE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_U16 class methods --- - - Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U16 - - Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16() - { - } // ~Inst_VOPC__V_CMP_GE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_U16 class methods --- - - Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u16") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U16 - - Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16() - { - } // ~Inst_VOPC__V_CMP_T_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_I16 class methods --- - - Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_I16 - - Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16() - { - } // ~Inst_VOPC__V_CMPX_F_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_I16 class methods --- - - Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_I16 - - Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16() - { - } // ~Inst_VOPC__V_CMPX_LT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_I16 class methods --- - - Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_I16 - - Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16() - { - } // ~Inst_VOPC__V_CMPX_EQ_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_I16 class methods --- - - Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_I16 - - Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16() - { - } // ~Inst_VOPC__V_CMPX_LE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_I16 class methods --- - - Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_I16 - - Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16() - { - } // ~Inst_VOPC__V_CMPX_GT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_I16 class methods --- - - Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_I16 - - Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16() - { - } // ~Inst_VOPC__V_CMPX_NE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_I16 class methods --- - - Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_I16 - - Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16() - { - } // ~Inst_VOPC__V_CMPX_GE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_I16 class methods --- - - Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_I16 - - Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16() - { - } // ~Inst_VOPC__V_CMPX_T_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_U16 class methods --- - - Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_U16 - - Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16() - { - } // ~Inst_VOPC__V_CMPX_F_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_U16 class methods --- - - Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_U16 - - Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16() - { - } // ~Inst_VOPC__V_CMPX_LT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_U16 class methods --- - - Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_U16 - - Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16() - { - } // ~Inst_VOPC__V_CMPX_EQ_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_U16 class methods --- - - Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_U16 - - Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16() - { - } // ~Inst_VOPC__V_CMPX_LE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_U16 class methods --- - - Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_U16 - - Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16() - { - } // ~Inst_VOPC__V_CMPX_GT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_U16 class methods --- - - Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_U16 - - Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16() - { - } // ~Inst_VOPC__V_CMPX_NE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_U16 class methods --- - - Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_U16 - - Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16() - { - } // ~Inst_VOPC__V_CMPX_GE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_U16 class methods --- - - Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u16") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_U16 - - Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16() - { - } // ~Inst_VOPC__V_CMPX_T_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_I32 class methods --- - - Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I32 - - Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32() - { - } // ~Inst_VOPC__V_CMP_F_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_I32 class methods --- - - Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I32 - - Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32() - { - } // ~Inst_VOPC__V_CMP_LT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_I32 class methods --- - - Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I32 - - Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32() - { - } // ~Inst_VOPC__V_CMP_EQ_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_I32 class methods --- - - Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I32 - - Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32() - { - } // ~Inst_VOPC__V_CMP_LE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_I32 class methods --- - - Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I32 - - Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32() - { - } // ~Inst_VOPC__V_CMP_GT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_I32 class methods --- - - Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I32 - - Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32() - { - } // ~Inst_VOPC__V_CMP_NE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_I32 class methods --- - - Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I32 - - Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32() - { - } // ~Inst_VOPC__V_CMP_GE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_I32 class methods --- - - Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I32 - - Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32() - { - } // ~Inst_VOPC__V_CMP_T_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_U32 class methods --- - - Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U32 - - Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32() - { - } // ~Inst_VOPC__V_CMP_F_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_U32 class methods --- - - Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U32 - - Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32() - { - } // ~Inst_VOPC__V_CMP_LT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_U32 class methods --- - - Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U32 - - Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32() - { - } // ~Inst_VOPC__V_CMP_EQ_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_U32 class methods --- - - Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U32 - - Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32() - { - } // ~Inst_VOPC__V_CMP_LE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_U32 class methods --- - - Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U32 - - Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32() - { - } // ~Inst_VOPC__V_CMP_GT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_U32 class methods --- - - Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U32 - - Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32() - { - } // ~Inst_VOPC__V_CMP_NE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_U32 class methods --- - - Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U32 - - Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32() - { - } // ~Inst_VOPC__V_CMP_GE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_U32 class methods --- - - Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u32") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U32 - - Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32() - { - } // ~Inst_VOPC__V_CMP_T_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_I32 class methods --- - - Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_I32 - - Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32() - { - } // ~Inst_VOPC__V_CMPX_F_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_I32 class methods --- - - Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_I32 - - Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32() - { - } // ~Inst_VOPC__V_CMPX_LT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_I32 class methods --- - - Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_I32 - - Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32() - { - } // ~Inst_VOPC__V_CMPX_EQ_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_I32 class methods --- - - Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_I32 - - Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32() - { - } // ~Inst_VOPC__V_CMPX_LE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_I32 class methods --- - - Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_I32 - - Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32() - { - } // ~Inst_VOPC__V_CMPX_GT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_I32 class methods --- - - Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_I32 - - Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32() - { - } // ~Inst_VOPC__V_CMPX_NE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_I32 class methods --- - - Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_I32 - - Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32() - { - } // ~Inst_VOPC__V_CMPX_GE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_I32 class methods --- - - Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_I32 - - Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32() - { - } // ~Inst_VOPC__V_CMPX_T_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_U32 class methods --- - - Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_U32 - - Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32() - { - } // ~Inst_VOPC__V_CMPX_F_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_U32 class methods --- - - Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_U32 - - Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32() - { - } // ~Inst_VOPC__V_CMPX_LT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_U32 class methods --- - - Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_U32 - - Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32() - { - } // ~Inst_VOPC__V_CMPX_EQ_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_U32 class methods --- - - Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_U32 - - Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32() - { - } // ~Inst_VOPC__V_CMPX_LE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_U32 class methods --- - - Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_U32 - - Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32() - { - } // ~Inst_VOPC__V_CMPX_GT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_U32 class methods --- - - Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_U32 - - Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32() - { - } // ~Inst_VOPC__V_CMPX_NE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_U32 class methods --- - - Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_U32 - - Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32() - { - } // ~Inst_VOPC__V_CMPX_GE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_U32 class methods --- - - Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u32") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_U32 - - Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32() - { - } // ~Inst_VOPC__V_CMPX_T_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_I64 class methods --- - - Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_I64 - - Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64() - { - } // ~Inst_VOPC__V_CMP_F_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_I64 class methods --- - - Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_I64 - - Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64() - { - } // ~Inst_VOPC__V_CMP_LT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_I64 class methods --- - - Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_I64 - - Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64() - { - } // ~Inst_VOPC__V_CMP_EQ_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_I64 class methods --- - - Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_I64 - - Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64() - { - } // ~Inst_VOPC__V_CMP_LE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_I64 class methods --- - - Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_I64 - - Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64() - { - } // ~Inst_VOPC__V_CMP_GT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_I64 class methods --- - - Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_I64 - - Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64() - { - } // ~Inst_VOPC__V_CMP_NE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_I64 class methods --- - - Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_I64 - - Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64() - { - } // ~Inst_VOPC__V_CMP_GE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_I64 class methods --- - - Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_i64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_I64 - - Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64() - { - } // ~Inst_VOPC__V_CMP_T_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_F_U64 class methods --- - - Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_f_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_F_U64 - - Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64() - { - } // ~Inst_VOPC__V_CMP_F_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LT_U64 class methods --- - - Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_lt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LT_U64 - - Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64() - { - } // ~Inst_VOPC__V_CMP_LT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_EQ_U64 class methods --- - - Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_eq_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_EQ_U64 - - Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64() - { - } // ~Inst_VOPC__V_CMP_EQ_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_LE_U64 class methods --- - - Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_le_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_LE_U64 - - Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64() - { - } // ~Inst_VOPC__V_CMP_LE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GT_U64 class methods --- - - Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_gt_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GT_U64 - - Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64() - { - } // ~Inst_VOPC__V_CMP_GT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_NE_U64 class methods --- - - Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ne_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_NE_U64 - - Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64() - { - } // ~Inst_VOPC__V_CMP_NE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_GE_U64 class methods --- - - Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_ge_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_GE_U64 - - Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64() - { - } // ~Inst_VOPC__V_CMP_GE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMP_T_U64 class methods --- - - Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmp_t_u64") - { - setFlag(ALU); - } // Inst_VOPC__V_CMP_T_U64 - - Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64() - { - } // ~Inst_VOPC__V_CMP_T_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_I64 class methods --- - - Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_I64 - - Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64() - { - } // ~Inst_VOPC__V_CMPX_F_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_I64 class methods --- - - Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_I64 - - Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64() - { - } // ~Inst_VOPC__V_CMPX_LT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_I64 class methods --- - - Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_I64 - - Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64() - { - } // ~Inst_VOPC__V_CMPX_EQ_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_I64 class methods --- - - Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_I64 - - Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64() - { - } // ~Inst_VOPC__V_CMPX_LE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_I64 class methods --- - - Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_I64 - - Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64() - { - } // ~Inst_VOPC__V_CMPX_GT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_I64 class methods --- - - Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_I64 - - Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64() - { - } // ~Inst_VOPC__V_CMPX_NE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_I64 class methods --- - - Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_I64 - - Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64() - { - } // ~Inst_VOPC__V_CMPX_GE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_I64 class methods --- - - Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_i64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_I64 - - Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64() - { - } // ~Inst_VOPC__V_CMPX_T_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_F_U64 class methods --- - - Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_f_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_F_U64 - - Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64() - { - } // ~Inst_VOPC__V_CMPX_F_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LT_U64 class methods --- - - Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_lt_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LT_U64 - - Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64() - { - } // ~Inst_VOPC__V_CMPX_LT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_EQ_U64 class methods --- - - Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_eq_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_EQ_U64 - - Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64() - { - } // ~Inst_VOPC__V_CMPX_EQ_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_LE_U64 class methods --- - - Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_le_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_LE_U64 - - Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64() - { - } // ~Inst_VOPC__V_CMPX_LE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GT_U64 class methods --- - - Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_gt_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GT_U64 - - Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64() - { - } // ~Inst_VOPC__V_CMPX_GT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_NE_U64 class methods --- - - Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ne_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_NE_U64 - - Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64() - { - } // ~Inst_VOPC__V_CMPX_NE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_GE_U64 class methods --- - - Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_ge_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_GE_U64 - - Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64() - { - } // ~Inst_VOPC__V_CMPX_GE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VOPC__V_CMPX_T_U64 class methods --- - - Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt) - : Inst_VOPC(iFmt, "v_cmpx_t_u64") - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOPC__V_CMPX_T_U64 - - Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64() - { - } // ~Inst_VOPC__V_CMPX_T_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, 1); - } - } - - wf->execMask() = vcc.rawData(); - vcc.write(); - } // execute - // --- Inst_VINTRP__V_INTERP_P1_F32 class methods --- - - Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_p1_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_P1_F32 - - Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32() - { - } // ~Inst_VINTRP__V_INTERP_P1_F32 - - // --- description from .arch file --- - // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; - // if D == S then data corruption will occur. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VINTRP__V_INTERP_P2_F32 class methods --- - - Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_p2_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_P2_F32 - - Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32() - { - } // ~Inst_VINTRP__V_INTERP_P2_F32 - - // --- description from .arch file --- - // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VINTRP__V_INTERP_MOV_F32 class methods --- - - Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32( - InFmt_VINTRP *iFmt) - : Inst_VINTRP(iFmt, "v_interp_mov_f32") - { - setFlag(ALU); - setFlag(F32); - } // Inst_VINTRP__V_INTERP_MOV_F32 - - Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32() - { - } // ~Inst_VINTRP__V_INTERP_MOV_F32 - - // --- description from .arch file --- - // D.f = {P10,P20,P0}[S.u]; parameter load. - void - Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_CLASS_F32 class methods --- - - Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_class_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_CLASS_F32 - - Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32() - { - } // ~Inst_VOP3__V_CMP_CLASS_F32 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_CLASS_F32 class methods --- - - Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_class_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_CLASS_F32 - - Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F32 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.f - // The function reports true if the floating point value is *any* of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_CLASS_F64 class methods --- - - Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_class_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_CLASS_F64 - - Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64() - { - } // ~Inst_VOP3__V_CMP_CLASS_F64 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.d - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_CLASS_F64 class methods --- - - Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_class_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_CLASS_F64 - - Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F64 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // S0.d - // The function reports true if the floating point value is *any* of the - // numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(src1[lane], 0) || bits(src1[lane], 1)) { - // is NaN - if (std::isnan(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 2)) { - // is -infinity - if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 3)) { - // is -normal - if (std::isnormal(src0[lane]) - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 4)) { - // is -denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 5)) { - // is -zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 6)) { - // is +zero - if (std::fpclassify(src0[lane]) == FP_ZERO - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 7)) { - // is +denormal - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 8)) { - // is +normal - if (std::isnormal(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - if (bits(src1[lane], 9)) { - // is +infinity - if (std::isinf(src0[lane]) - && !std::signbit(src0[lane])) { - sdst.setBit(lane, 1); - continue; - } - } - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_CLASS_F16 class methods --- - - Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_class_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_CLASS_F16 - - Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16() - { - } // ~Inst_VOP3__V_CMP_CLASS_F16 - - // --- description from .arch file --- - // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_CLASS_F16 class methods --- - - Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_class_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_CLASS_F16 - - Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16() - { - } // ~Inst_VOP3__V_CMPX_CLASS_F16 - - // --- description from .arch file --- - // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on - // --- S0.f16 - // The function reports true if the floating point value is *any* of the - // --- numeric types selected in S1.u according to the following list: - // S1.u[0] -- value is a signaling NaN. - // S1.u[1] -- value is a quiet NaN. - // S1.u[2] -- value is negative infinity. - // S1.u[3] -- value is a negative normal value. - // S1.u[4] -- value is a negative denormal value. - // S1.u[5] -- value is negative zero. - // S1.u[6] -- value is positive zero. - // S1.u[7] -- value is a positive denormal value. - // S1.u[8] -- value is a positive normal value. - // S1.u[9] -- value is positive infinity. - void - Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_F_F16 class methods --- - - Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_F_F16 - - Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16() - { - } // ~Inst_VOP3__V_CMP_F_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_LT_F16 class methods --- - - Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LT_F16 - - Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16() - { - } // ~Inst_VOP3__V_CMP_LT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_F16 class methods --- - - Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_EQ_F16 - - Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16() - { - } // ~Inst_VOP3__V_CMP_EQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_LE_F16 class methods --- - - Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LE_F16 - - Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16() - { - } // ~Inst_VOP3__V_CMP_LE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_GT_F16 class methods --- - - Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_GT_F16 - - Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16() - { - } // ~Inst_VOP3__V_CMP_GT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_LG_F16 class methods --- - - Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_LG_F16 - - Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16() - { - } // ~Inst_VOP3__V_CMP_LG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_GE_F16 class methods --- - - Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_GE_F16 - - Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16() - { - } // ~Inst_VOP3__V_CMP_GE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_O_F16 class methods --- - - Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_o_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_O_F16 - - Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16() - { - } // ~Inst_VOP3__V_CMP_O_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_U_F16 class methods --- - - Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_u_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_U_F16 - - Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16() - { - } // ~Inst_VOP3__V_CMP_U_F16 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NGE_F16 class methods --- - - Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nge_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NGE_F16 - - Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16() - { - } // ~Inst_VOP3__V_CMP_NGE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NLG_F16 class methods --- - - Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlg_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLG_F16 - - Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16() - { - } // ~Inst_VOP3__V_CMP_NLG_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NGT_F16 class methods --- - - Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ngt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NGT_F16 - - Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16() - { - } // ~Inst_VOP3__V_CMP_NGT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NLE_F16 class methods --- - - Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nle_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLE_F16 - - Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16() - { - } // ~Inst_VOP3__V_CMP_NLE_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NEQ_F16 class methods --- - - Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_neq_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NEQ_F16 - - Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16() - { - } // ~Inst_VOP3__V_CMP_NEQ_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_NLT_F16 class methods --- - - Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlt_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_NLT_F16 - - Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16() - { - } // ~Inst_VOP3__V_CMP_NLT_F16 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMP_TRU_F16 class methods --- - - Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_tru_f16", true) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CMP_TRU_F16 - - Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16() - { - } // ~Inst_VOP3__V_CMP_TRU_F16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_F16 class methods --- - - Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_f16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_F16 - - Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16() - { - } // ~Inst_VOP3__V_CMPX_F_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_F16 class methods --- - - Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_F16 - - Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16() - { - } // ~Inst_VOP3__V_CMPX_LT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_F16 class methods --- - - Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_F16 - - Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16() - { - } // ~Inst_VOP3__V_CMPX_EQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_F16 class methods --- - - Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_F16 - - Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16() - { - } // ~Inst_VOP3__V_CMPX_LE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_F16 class methods --- - - Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_F16 - - Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16() - { - } // ~Inst_VOP3__V_CMPX_GT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_LG_F16 class methods --- - - Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lg_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LG_F16 - - Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16() - { - } // ~Inst_VOP3__V_CMPX_LG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_F16 class methods --- - - Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_F16 - - Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16() - { - } // ~Inst_VOP3__V_CMPX_GE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_O_F16 class methods --- - - Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_o_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_O_F16 - - Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16() - { - } // ~Inst_VOP3__V_CMPX_O_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_U_F16 class methods --- - - Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_u_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_U_F16 - - Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16() - { - } // ~Inst_VOP3__V_CMPX_U_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NGE_F16 class methods --- - - Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nge_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGE_F16 - - Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16() - { - } // ~Inst_VOP3__V_CMPX_NGE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NLG_F16 class methods --- - - Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlg_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLG_F16 - - Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16() - { - } // ~Inst_VOP3__V_CMPX_NLG_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NGT_F16 class methods --- - - Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ngt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGT_F16 - - Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16() - { - } // ~Inst_VOP3__V_CMPX_NGT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NLE_F16 class methods --- - - Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nle_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLE_F16 - - Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16() - { - } // ~Inst_VOP3__V_CMPX_NLE_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NEQ_F16 class methods --- - - Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_neq_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NEQ_F16 - - Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_NLT_F16 class methods --- - - Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlt_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLT_F16 - - Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16() - { - } // ~Inst_VOP3__V_CMPX_NLT_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CMPX_TRU_F16 class methods --- - - Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_tru_f16", true) - { - setFlag(ALU); - setFlag(F16); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_TRU_F16 - - Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16() - { - } // ~Inst_VOP3__V_CMPX_TRU_F16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_F32 class methods --- - - Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_F_F32 - - Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32() - { - } // ~Inst_VOP3__V_CMP_F_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_F32 class methods --- - - Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LT_F32 - - Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32() - { - } // ~Inst_VOP3__V_CMP_LT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_F32 class methods --- - - Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_EQ_F32 - - Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32() - { - } // ~Inst_VOP3__V_CMP_EQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_F32 class methods --- - - Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LE_F32 - - Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32() - { - } // ~Inst_VOP3__V_CMP_LE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_F32 class methods --- - - Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_GT_F32 - - Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32() - { - } // ~Inst_VOP3__V_CMP_GT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LG_F32 class methods --- - - Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_LG_F32 - - Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32() - { - } // ~Inst_VOP3__V_CMP_LG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_F32 class methods --- - - Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_GE_F32 - - Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32() - { - } // ~Inst_VOP3__V_CMP_GE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_O_F32 class methods --- - - Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_o_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_O_F32 - - Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32() - { - } // ~Inst_VOP3__V_CMP_O_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_U_F32 class methods --- - - Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_u_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_U_F32 - - Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32() - { - } // ~Inst_VOP3__V_CMP_U_F32 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGE_F32 class methods --- - - Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nge_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NGE_F32 - - Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32() - { - } // ~Inst_VOP3__V_CMP_NGE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLG_F32 class methods --- - - Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlg_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLG_F32 - - Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32() - { - } // ~Inst_VOP3__V_CMP_NLG_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGT_F32 class methods --- - - Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ngt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NGT_F32 - - Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32() - { - } // ~Inst_VOP3__V_CMP_NGT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLE_F32 class methods --- - - Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nle_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLE_F32 - - Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32() - { - } // ~Inst_VOP3__V_CMP_NLE_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NEQ_F32 class methods --- - - Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_neq_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NEQ_F32 - - Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32() - { - } // ~Inst_VOP3__V_CMP_NEQ_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLT_F32 class methods --- - - Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlt_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_NLT_F32 - - Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32() - { - } // ~Inst_VOP3__V_CMP_NLT_F32 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_TRU_F32 class methods --- - - Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_tru_f32", true) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CMP_TRU_F32 - - Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32() - { - } // ~Inst_VOP3__V_CMP_TRU_F32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_F32 class methods --- - - Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_F32 - - Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32() - { - } // ~Inst_VOP3__V_CMPX_F_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_F32 class methods --- - - Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_F32 - - Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32() - { - } // ~Inst_VOP3__V_CMPX_LT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_F32 class methods --- - - Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_F32 - - Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32() - { - } // ~Inst_VOP3__V_CMPX_EQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_F32 class methods --- - - Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_F32 - - Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32() - { - } // ~Inst_VOP3__V_CMPX_LE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_F32 class methods --- - - Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_F32 - - Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32() - { - } // ~Inst_VOP3__V_CMPX_GT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LG_F32 class methods --- - - Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lg_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LG_F32 - - Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32() - { - } // ~Inst_VOP3__V_CMPX_LG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_F32 class methods --- - - Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_F32 - - Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32() - { - } // ~Inst_VOP3__V_CMPX_GE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_O_F32 class methods --- - - Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_o_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_O_F32 - - Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32() - { - } // ~Inst_VOP3__V_CMPX_O_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_U_F32 class methods --- - - Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_u_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_U_F32 - - Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32() - { - } // ~Inst_VOP3__V_CMPX_U_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGE_F32 class methods --- - - Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nge_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGE_F32 - - Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32() - { - } // ~Inst_VOP3__V_CMPX_NGE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLG_F32 class methods --- - - Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlg_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLG_F32 - - Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32() - { - } // ~Inst_VOP3__V_CMPX_NLG_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGT_F32 class methods --- - - Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ngt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGT_F32 - - Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32() - { - } // ~Inst_VOP3__V_CMPX_NGT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLE_F32 class methods --- - - Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nle_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLE_F32 - - Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32() - { - } // ~Inst_VOP3__V_CMPX_NLE_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NEQ_F32 class methods --- - - Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_neq_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NEQ_F32 - - Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLT_F32 class methods --- - - Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlt_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLT_F32 - - Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32() - { - } // ~Inst_VOP3__V_CMPX_NLT_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_TRU_F32 class methods --- - - Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_tru_f32", true) - { - setFlag(ALU); - setFlag(F32); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_TRU_F32 - - Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32() - { - } // ~Inst_VOP3__V_CMPX_TRU_F32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_F64 class methods --- - - Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_F_F64 - - Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64() - { - } // ~Inst_VOP3__V_CMP_F_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_F64 class methods --- - - Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LT_F64 - - Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64() - { - } // ~Inst_VOP3__V_CMP_LT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_F64 class methods --- - - Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_EQ_F64 - - Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64() - { - } // ~Inst_VOP3__V_CMP_EQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_F64 class methods --- - - Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LE_F64 - - Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64() - { - } // ~Inst_VOP3__V_CMP_LE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_F64 class methods --- - - Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_GT_F64 - - Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64() - { - } // ~Inst_VOP3__V_CMP_GT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LG_F64 class methods --- - - Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_LG_F64 - - Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64() - { - } // ~Inst_VOP3__V_CMP_LG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_F64 class methods --- - - Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_GE_F64 - - Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64() - { - } // ~Inst_VOP3__V_CMP_GE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_O_F64 class methods --- - - Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_o_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_O_F64 - - Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64() - { - } // ~Inst_VOP3__V_CMP_O_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_U_F64 class methods --- - - Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_u_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_U_F64 - - Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64() - { - } // ~Inst_VOP3__V_CMP_U_F64 - - // --- description from .arch file --- - // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGE_F64 class methods --- - - Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nge_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NGE_F64 - - Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64() - { - } // ~Inst_VOP3__V_CMP_NGE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLG_F64 class methods --- - - Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlg_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLG_F64 - - Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64() - { - } // ~Inst_VOP3__V_CMP_NLG_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NGT_F64 class methods --- - - Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ngt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NGT_F64 - - Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64() - { - } // ~Inst_VOP3__V_CMP_NGT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLE_F64 class methods --- - - Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nle_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLE_F64 - - Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64() - { - } // ~Inst_VOP3__V_CMP_NLE_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NEQ_F64 class methods --- - - Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_neq_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NEQ_F64 - - Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64() - { - } // ~Inst_VOP3__V_CMP_NEQ_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NLT_F64 class methods --- - - Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_nlt_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_NLT_F64 - - Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64() - { - } // ~Inst_VOP3__V_CMP_NLT_F64 - - // --- description from .arch file --- - // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_TRU_F64 class methods --- - - Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_tru_f64", true) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CMP_TRU_F64 - - Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64() - { - } // ~Inst_VOP3__V_CMP_TRU_F64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_F64 class methods --- - - Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_F64 - - Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64() - { - } // ~Inst_VOP3__V_CMPX_F_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_F64 class methods --- - - Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_F64 - - Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64() - { - } // ~Inst_VOP3__V_CMPX_LT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_F64 class methods --- - - Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_F64 - - Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64() - { - } // ~Inst_VOP3__V_CMPX_EQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_F64 class methods --- - - Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_F64 - - Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64() - { - } // ~Inst_VOP3__V_CMPX_LE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_F64 class methods --- - - Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_F64 - - Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64() - { - } // ~Inst_VOP3__V_CMPX_GT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LG_F64 class methods --- - - Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lg_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LG_F64 - - Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64() - { - } // ~Inst_VOP3__V_CMPX_LG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_F64 class methods --- - - Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_F64 - - Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64() - { - } // ~Inst_VOP3__V_CMPX_GE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_O_F64 class methods --- - - Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_o_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_O_F64 - - Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64() - { - } // ~Inst_VOP3__V_CMPX_O_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (!std::isnan(src0[lane]) - && !std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_U_F64 class methods --- - - Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_u_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_U_F64 - - Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64() - { - } // ~Inst_VOP3__V_CMPX_U_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC - // encoding. - void - Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, (std::isnan(src0[lane]) - || std::isnan(src1[lane])) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGE_F64 class methods --- - - Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nge_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGE_F64 - - Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64() - { - } // ~Inst_VOP3__V_CMPX_NGE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLG_F64 class methods --- - - Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlg_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLG_F64 - - Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64() - { - } // ~Inst_VOP3__V_CMPX_NLG_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane] - || src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NGT_F64 class methods --- - - Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ngt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NGT_F64 - - Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64() - { - } // ~Inst_VOP3__V_CMPX_NGT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLE_F64 class methods --- - - Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nle_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLE_F64 - - Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64() - { - } // ~Inst_VOP3__V_CMPX_NLE_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NEQ_F64 class methods --- - - Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_neq_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NEQ_F64 - - Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64() - { - } // ~Inst_VOP3__V_CMPX_NEQ_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NLT_F64 class methods --- - - Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_nlt_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NLT_F64 - - Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64() - { - } // ~Inst_VOP3__V_CMPX_NLT_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_TRU_F64 class methods --- - - Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_tru_f64", true) - { - setFlag(ALU); - setFlag(F64); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_TRU_F64 - - Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64() - { - } // ~Inst_VOP3__V_CMPX_TRU_F64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_I16 class methods --- - - Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I16 - - Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16() - { - } // ~Inst_VOP3__V_CMP_F_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_I16 class methods --- - - Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I16 - - Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16() - { - } // ~Inst_VOP3__V_CMP_LT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_I16 class methods --- - - Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I16 - - Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16() - { - } // ~Inst_VOP3__V_CMP_EQ_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_I16 class methods --- - - Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I16 - - Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16() - { - } // ~Inst_VOP3__V_CMP_LE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_I16 class methods --- - - Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I16 - - Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16() - { - } // ~Inst_VOP3__V_CMP_GT_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_I16 class methods --- - - Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I16 - - Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16() - { - } // ~Inst_VOP3__V_CMP_NE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_I16 class methods --- - - Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I16 - - Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16() - { - } // ~Inst_VOP3__V_CMP_GE_I16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_I16 class methods --- - - Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_i16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I16 - - Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16() - { - } // ~Inst_VOP3__V_CMP_T_I16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_U16 class methods --- - - Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U16 - - Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16() - { - } // ~Inst_VOP3__V_CMP_F_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_U16 class methods --- - - Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U16 - - Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16() - { - } // ~Inst_VOP3__V_CMP_LT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_U16 class methods --- - - Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U16 - - Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16() - { - } // ~Inst_VOP3__V_CMP_EQ_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_U16 class methods --- - - Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U16 - - Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16() - { - } // ~Inst_VOP3__V_CMP_LE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_U16 class methods --- - - Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U16 - - Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16() - { - } // ~Inst_VOP3__V_CMP_GT_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_U16 class methods --- - - Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U16 - - Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16() - { - } // ~Inst_VOP3__V_CMP_NE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_U16 class methods --- - - Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U16 - - Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16() - { - } // ~Inst_VOP3__V_CMP_GE_U16 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_U16 class methods --- - - Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_u16", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U16 - - Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16() - { - } // ~Inst_VOP3__V_CMP_T_U16 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_I16 class methods --- - - Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_I16 - - Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16() - { - } // ~Inst_VOP3__V_CMPX_F_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_I16 class methods --- - - Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_I16 - - Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16() - { - } // ~Inst_VOP3__V_CMPX_LT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_I16 class methods --- - - Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_I16 - - Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16() - { - } // ~Inst_VOP3__V_CMPX_EQ_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_I16 class methods --- - - Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_I16 - - Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16() - { - } // ~Inst_VOP3__V_CMPX_LE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_I16 class methods --- - - Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_I16 - - Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16() - { - } // ~Inst_VOP3__V_CMPX_GT_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_I16 class methods --- - - Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_I16 - - Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16() - { - } // ~Inst_VOP3__V_CMPX_NE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_I16 class methods --- - - Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_I16 - - Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16() - { - } // ~Inst_VOP3__V_CMPX_GE_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_I16 class methods --- - - Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_i16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_I16 - - Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16() - { - } // ~Inst_VOP3__V_CMPX_T_I16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_U16 class methods --- - - Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_U16 - - Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16() - { - } // ~Inst_VOP3__V_CMPX_F_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_U16 class methods --- - - Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_U16 - - Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16() - { - } // ~Inst_VOP3__V_CMPX_LT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_U16 class methods --- - - Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_U16 - - Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16() - { - } // ~Inst_VOP3__V_CMPX_EQ_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_U16 class methods --- - - Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_U16 - - Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16() - { - } // ~Inst_VOP3__V_CMPX_LE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_U16 class methods --- - - Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_U16 - - Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16() - { - } // ~Inst_VOP3__V_CMPX_GT_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_U16 class methods --- - - Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_U16 - - Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16() - { - } // ~Inst_VOP3__V_CMPX_NE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_U16 class methods --- - - Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_U16 - - Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16() - { - } // ~Inst_VOP3__V_CMPX_GE_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_U16 class methods --- - - Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_u16", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_U16 - - Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16() - { - } // ~Inst_VOP3__V_CMPX_T_U16 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_I32 class methods --- - - Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I32 - - Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32() - { - } // ~Inst_VOP3__V_CMP_F_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_I32 class methods --- - - Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I32 - - Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32() - { - } // ~Inst_VOP3__V_CMP_LT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_I32 class methods --- - - Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I32 - - Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32() - { - } // ~Inst_VOP3__V_CMP_EQ_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_I32 class methods --- - - Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I32 - - Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32() - { - } // ~Inst_VOP3__V_CMP_LE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_I32 class methods --- - - Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I32 - - Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32() - { - } // ~Inst_VOP3__V_CMP_GT_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_I32 class methods --- - - Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I32 - - Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32() - { - } // ~Inst_VOP3__V_CMP_NE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_I32 class methods --- - - Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I32 - - Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32() - { - } // ~Inst_VOP3__V_CMP_GE_I32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_I32 class methods --- - - Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_i32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I32 - - Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32() - { - } // ~Inst_VOP3__V_CMP_T_I32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_U32 class methods --- - - Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U32 - - Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32() - { - } // ~Inst_VOP3__V_CMP_F_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_U32 class methods --- - - Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U32 - - Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32() - { - } // ~Inst_VOP3__V_CMP_LT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_U32 class methods --- - - Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U32 - - Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32() - { - } // ~Inst_VOP3__V_CMP_EQ_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_U32 class methods --- - - Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U32 - - Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32() - { - } // ~Inst_VOP3__V_CMP_LE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_U32 class methods --- - - Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U32 - - Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32() - { - } // ~Inst_VOP3__V_CMP_GT_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_U32 class methods --- - - Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U32 - - Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32() - { - } // ~Inst_VOP3__V_CMP_NE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_U32 class methods --- - - Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U32 - - Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32() - { - } // ~Inst_VOP3__V_CMP_GE_U32 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_U32 class methods --- - - Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_u32", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U32 - - Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32() - { - } // ~Inst_VOP3__V_CMP_T_U32 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_I32 class methods --- - - Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_I32 - - Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32() - { - } // ~Inst_VOP3__V_CMPX_F_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_I32 class methods --- - - Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_I32 - - Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32() - { - } // ~Inst_VOP3__V_CMPX_LT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_I32 class methods --- - - Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_I32 - - Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32() - { - } // ~Inst_VOP3__V_CMPX_EQ_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_I32 class methods --- - - Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_I32 - - Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32() - { - } // ~Inst_VOP3__V_CMPX_LE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_I32 class methods --- - - Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_I32 - - Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32() - { - } // ~Inst_VOP3__V_CMPX_GT_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_I32 class methods --- - - Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_I32 - - Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32() - { - } // ~Inst_VOP3__V_CMPX_NE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_I32 class methods --- - - Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_I32 - - Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32() - { - } // ~Inst_VOP3__V_CMPX_GE_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_I32 class methods --- - - Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_i32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_I32 - - Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32() - { - } // ~Inst_VOP3__V_CMPX_T_I32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_U32 class methods --- - - Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_U32 - - Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32() - { - } // ~Inst_VOP3__V_CMPX_F_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_U32 class methods --- - - Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_U32 - - Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32() - { - } // ~Inst_VOP3__V_CMPX_LT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_U32 class methods --- - - Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_U32 - - Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32() - { - } // ~Inst_VOP3__V_CMPX_EQ_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_U32 class methods --- - - Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_U32 - - Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32() - { - } // ~Inst_VOP3__V_CMPX_LE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_U32 class methods --- - - Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_U32 - - Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32() - { - } // ~Inst_VOP3__V_CMPX_GT_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_U32 class methods --- - - Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_U32 - - Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32() - { - } // ~Inst_VOP3__V_CMPX_NE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_U32 class methods --- - - Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_U32 - - Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32() - { - } // ~Inst_VOP3__V_CMPX_GE_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_U32 class methods --- - - Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_u32", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_U32 - - Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32() - { - } // ~Inst_VOP3__V_CMPX_T_U32 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_I64 class methods --- - - Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_I64 - - Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64() - { - } // ~Inst_VOP3__V_CMP_F_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_I64 class methods --- - - Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_I64 - - Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64() - { - } // ~Inst_VOP3__V_CMP_LT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_I64 class methods --- - - Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_I64 - - Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64() - { - } // ~Inst_VOP3__V_CMP_EQ_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_I64 class methods --- - - Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_I64 - - Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64() - { - } // ~Inst_VOP3__V_CMP_LE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_I64 class methods --- - - Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_I64 - - Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64() - { - } // ~Inst_VOP3__V_CMP_GT_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_I64 class methods --- - - Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_I64 - - Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64() - { - } // ~Inst_VOP3__V_CMP_NE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_I64 class methods --- - - Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_I64 - - Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64() - { - } // ~Inst_VOP3__V_CMP_GE_I64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_I64 class methods --- - - Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_i64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_I64 - - Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64() - { - } // ~Inst_VOP3__V_CMP_T_I64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_F_U64 class methods --- - - Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_f_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_F_U64 - - Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64() - { - } // ~Inst_VOP3__V_CMP_F_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LT_U64 class methods --- - - Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_lt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LT_U64 - - Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64() - { - } // ~Inst_VOP3__V_CMP_LT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_EQ_U64 class methods --- - - Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_eq_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_EQ_U64 - - Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64() - { - } // ~Inst_VOP3__V_CMP_EQ_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_LE_U64 class methods --- - - Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_le_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_LE_U64 - - Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64() - { - } // ~Inst_VOP3__V_CMP_LE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GT_U64 class methods --- - - Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_gt_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GT_U64 - - Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64() - { - } // ~Inst_VOP3__V_CMP_GT_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_NE_U64 class methods --- - - Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ne_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_NE_U64 - - Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64() - { - } // ~Inst_VOP3__V_CMP_NE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_GE_U64 class methods --- - - Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_ge_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_GE_U64 - - Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64() - { - } // ~Inst_VOP3__V_CMP_GE_U64 - - // --- description from .arch file --- - // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMP_T_U64 class methods --- - - Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmp_t_u64", true) - { - setFlag(ALU); - } // Inst_VOP3__V_CMP_T_U64 - - Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64() - { - } // ~Inst_VOP3__V_CMP_T_U64 - - // --- description from .arch file --- - // D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_I64 class methods --- - - Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_I64 - - Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64() - { - } // ~Inst_VOP3__V_CMPX_F_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_I64 class methods --- - - Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_I64 - - Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64() - { - } // ~Inst_VOP3__V_CMPX_LT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_I64 class methods --- - - Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_I64 - - Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64() - { - } // ~Inst_VOP3__V_CMPX_EQ_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_I64 class methods --- - - Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_I64 - - Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64() - { - } // ~Inst_VOP3__V_CMPX_LE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_I64 class methods --- - - Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_I64 - - Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64() - { - } // ~Inst_VOP3__V_CMPX_GT_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_I64 class methods --- - - Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_I64 - - Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64() - { - } // ~Inst_VOP3__V_CMPX_NE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_I64 class methods --- - - Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_I64 - - Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64() - { - } // ~Inst_VOP3__V_CMPX_GE_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_I64 class methods --- - - Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_i64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_I64 - - Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64() - { - } // ~Inst_VOP3__V_CMPX_T_I64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_F_U64 class methods --- - - Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_f_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_F_U64 - - Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64() - { - } // ~Inst_VOP3__V_CMPX_F_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LT_U64 class methods --- - - Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_lt_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LT_U64 - - Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64() - { - } // ~Inst_VOP3__V_CMPX_LT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_EQ_U64 class methods --- - - Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_eq_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_EQ_U64 - - Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64() - { - } // ~Inst_VOP3__V_CMPX_EQ_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_LE_U64 class methods --- - - Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_le_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_LE_U64 - - Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64() - { - } // ~Inst_VOP3__V_CMPX_LE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GT_U64 class methods --- - - Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_gt_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GT_U64 - - Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64() - { - } // ~Inst_VOP3__V_CMPX_GT_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_NE_U64 class methods --- - - Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ne_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_NE_U64 - - Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64() - { - } // ~Inst_VOP3__V_CMPX_NE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_GE_U64 class methods --- - - Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_ge_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_GE_U64 - - Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64() - { - } // ~Inst_VOP3__V_CMPX_GE_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CMPX_T_U64 class methods --- - - Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cmpx_t_u64", true) - { - setFlag(ALU); - setFlag(WritesEXEC); - } // Inst_VOP3__V_CMPX_T_U64 - - Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64() - { - } // ~Inst_VOP3__V_CMPX_T_U64 - - // --- description from .arch file --- - // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. - void - Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ScalarOperandU64 sdst(gpuDynInst, instData.VDST); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - sdst.setBit(lane, 1); - } - } - - wf->execMask() = sdst.rawData(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_CNDMASK_B32 class methods --- - - Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cndmask_b32", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - } // Inst_VOP3__V_CNDMASK_B32 - - Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32() - { - } // ~Inst_VOP3__V_CNDMASK_B32 - - // --- description from .arch file --- - // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC - // as a scalar GPR in S2. - void - Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(vcc.rawData(), lane) - ? src1[lane] : src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_F32 class methods --- - - Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_ADD_F32 - - Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32() - { - } // ~Inst_VOP3__V_ADD_F32 - - // --- description from .arch file --- - // D.f = S0.f + S1.f. - void - Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUB_F32 class methods --- - - Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SUB_F32 - - Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32() - { - } // ~Inst_VOP3__V_SUB_F32 - - // --- description from .arch file --- - // D.f = S0.f - S1.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_F32 class methods --- - - Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SUBREV_F32 - - Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32() - { - } // ~Inst_VOP3__V_SUBREV_F32 - - // --- description from .arch file --- - // D.f = S1.f - S0.f. - // SQ translates to V_ADD_F32. - void - Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_LEGACY_F32 class methods --- - - Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MUL_LEGACY_F32 - - Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32() - { - } // ~Inst_VOP3__V_MUL_LEGACY_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). - void - Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_F32 class methods --- - - Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MUL_F32 - - Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32() - { - } // ~Inst_VOP3__V_MUL_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f. - void - Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_I32_I24 class methods --- - - Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_i32_i24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_I32_I24 - - Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24() - { - } // ~Inst_VOP3__V_MUL_I32_I24 - - // --- description from .arch file --- - // D.i = S0.i[23:0] * S1.i[23:0]. - void - Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) - * sext<24>(bits(src1[lane], 23, 0)); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_I32_I24 class methods --- - - Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_I32_I24 - - Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24() - { - } // ~Inst_VOP3__V_MUL_HI_I32_I24 - - // --- description from .arch file --- - // D.i = (S0.i[23:0] * S1.i[23:0])>>32. - void - Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 tmp_src0 - = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); - VecElemI64 tmp_src1 - = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); - - vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_U32_U24 class methods --- - - Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_u32_u24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_U32_U24 - - Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24() - { - } // ~Inst_VOP3__V_MUL_U32_U24 - - // --- description from .arch file --- - // D.u = S0.u[23:0] * S1.u[23:0]. - void - Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_U32_U24 class methods --- - - Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_U32_U24 - - Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24() - { - } // ~Inst_VOP3__V_MUL_HI_U32_U24 - - // --- description from .arch file --- - // D.i = (S0.u[23:0] * S1.u[23:0])>>32. - void - Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); - VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); - vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_F32 class methods --- - - Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MIN_F32 - - Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32() - { - } // ~Inst_VOP3__V_MIN_F32 - - // --- description from .arch file --- - // D.f = (S0.f < S1.f ? S0.f : S1.f). - void - Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_F32 class methods --- - - Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MAX_F32 - - Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32() - { - } // ~Inst_VOP3__V_MAX_F32 - - // --- description from .arch file --- - // D.f = (S0.f >= S1.f ? S0.f : S1.f). - void - Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_I32 class methods --- - - Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_I32 - - Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32() - { - } // ~Inst_VOP3__V_MIN_I32 - - // --- description from .arch file --- - // D.i = min(S0.i, S1.i). - void - Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_I32 class methods --- - - Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_I32 - - Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32() - { - } // ~Inst_VOP3__V_MAX_I32 - - // --- description from .arch file --- - // D.i = max(S0.i, S1.i). - void - Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_U32 class methods --- - - Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_U32 - - Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32() - { - } // ~Inst_VOP3__V_MIN_U32 - - // --- description from .arch file --- - // D.u = min(S0.u, S1.u). - void - Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_U32 class methods --- - - Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_U32 - - Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32() - { - } // ~Inst_VOP3__V_MAX_U32 - - // --- description from .arch file --- - // D.u = max(S0.u, S1.u). - void - Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHRREV_B32 class methods --- - - Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshrrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B32 - - Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32() - { - } // ~Inst_VOP3__V_LSHRREV_B32 - - // --- description from .arch file --- - // D.u = S1.u >> S0.u[4:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ASHRREV_I32 class methods --- - - Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ashrrev_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I32 - - Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32() - { - } // ~Inst_VOP3__V_ASHRREV_I32 - - // --- description from .arch file --- - // D.i = signext(S1.i) >> S0.i[4:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B32 class methods --- - - Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshlrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B32 - - Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32() - { - } // ~Inst_VOP3__V_LSHLREV_B32 - - // --- description from .arch file --- - // D.u = S1.u << S0.u[4:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_AND_B32 class methods --- - - Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_and_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_AND_B32 - - Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32() - { - } // ~Inst_VOP3__V_AND_B32 - - // --- description from .arch file --- - // D.u = S0.u & S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] & src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_OR_B32 class methods --- - - Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_OR_B32 - - Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32() - { - } // ~Inst_VOP3__V_OR_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_OR3_B32 class methods --- - - Inst_VOP3__V_OR3_B32::Inst_VOP3__V_OR3_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_or3_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_OR3_B32 - - Inst_VOP3__V_OR3_B32::~Inst_VOP3__V_OR3_B32() - { - } // ~Inst_VOP3__V_OR3_B32 - - // --- description from .arch file --- - // D.u = S0.u | S1.u | S2.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_OR3_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] | src1[lane] | src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_XOR_B32 class methods --- - - Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_xor_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_XOR_B32 - - Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32() - { - } // ~Inst_VOP3__V_XOR_B32 - - // --- description from .arch file --- - // D.u = S0.u ^ S1.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] ^ src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAC_F32 class methods --- - - Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mac_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAC); - } // Inst_VOP3__V_MAC_F32 - - Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32() - { - } // ~Inst_VOP3__V_MAC_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + D.f. - // SQ translates to V_MAD_F32. - void - Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vdst.read(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_CO_U32 class methods --- - - Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_add_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_ADD_CO_U32 - - Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32() - { - } // ~Inst_VOP3__V_ADD_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u; - // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED - // --- overflow or carry-out for V_ADDC_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - vcc.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP3__V_SUB_CO_U32 class methods --- - - Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_sub_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_SUB_CO_U32 - - Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32() - { - } // ~Inst_VOP3__V_SUB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u; - // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - void - Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_CO_U32 class methods --- - - Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - } // Inst_VOP3__V_SUBREV_CO_U32 - - Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32() - { - } // ~Inst_VOP3__V_SUBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u; - // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or - // carry-out for V_SUBB_U32. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair. - // SQ translates this to V_SUB_U32 with reversed operands. - void - Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); - } - } - - vdst.write(); - vcc.write(); - } // execute - // --- Inst_VOP3__V_ADDC_CO_U32 class methods --- - - Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_addc_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_ADDC_CO_U32 - - Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32() - { - } // ~Inst_VOP3__V_ADDC_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + VCC[threadId]; - // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) - // is an UNSIGNED overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] - + bits(vcc.rawData(), lane); - sdst.setBit(lane, ((VecElemU64)src0[lane] - + (VecElemU64)src1[lane] - + (VecElemU64)bits(vcc.rawData(), lane)) - >= 0x100000000 ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_SUBB_CO_U32 class methods --- - - Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subb_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_SUBB_CO_U32 - - Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32() - { - } // ~Inst_VOP3__V_SUBB_CO_U32 - - // --- description from .arch file --- - // D.u = S0.u - S1.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // --- overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // --- source comes from the SGPR-pair at S2.u. - void - Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - ScalarOperandU64 sdst(gpuDynInst, instData.SDST); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane] - - bits(vcc.rawData(), lane); - sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods --- - - Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_subbrev_co_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(ReadsVCC); - } // Inst_VOP3__V_SUBBREV_CO_U32 - - Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32() - { - } // ~Inst_VOP3__V_SUBBREV_CO_U32 - - // --- description from .arch file --- - // D.u = S1.u - S0.u - VCC[threadId]; - // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED - // overflow. - // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC - // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. - void - Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST); - ScalarOperandU64 vcc(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - vcc.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane] - - bits(vcc.rawData(), lane); - sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) - > src0[lane] ? 1 : 0); - } - } - - vdst.write(); - sdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_F16 class methods --- - - Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_ADD_F16 - - Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16() - { - } // ~Inst_VOP3__V_ADD_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SUB_F16 class methods --- - - Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SUB_F16 - - Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16() - { - } // ~Inst_VOP3__V_SUB_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 - S1.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SUBREV_F16 class methods --- - - Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SUBREV_F16 - - Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16() - { - } // ~Inst_VOP3__V_SUBREV_F16 - - // --- description from .arch file --- - // D.f16 = S1.f16 - S0.f16. - // Supports denormals, round mode, exception flags, saturation. - // SQ translates to V_ADD_F16. - void - Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MUL_F16 class methods --- - - Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MUL_F16 - - Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16() - { - } // ~Inst_VOP3__V_MUL_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16. - // Supports denormals, round mode, exception flags, saturation. - void - Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAC_F16 class methods --- - - Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mac_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(MAC); - } // Inst_VOP3__V_MAC_F16 - - Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16() - { - } // ~Inst_VOP3__V_MAC_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + D.f16. - // Supports round mode, exception flags, saturation. - // SQ translates this to V_MAD_F16. - void - Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_ADD_U16 class methods --- - - Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_U16 - - Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16() - { - } // ~Inst_VOP3__V_ADD_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 + S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUB_U16 class methods --- - - Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUB_U16 - - Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16() - { - } // ~Inst_VOP3__V_SUB_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 - S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_U16 class methods --- - - Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUBREV_U16 - - Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16() - { - } // ~Inst_VOP3__V_SUBREV_U16 - - // --- description from .arch file --- - // D.u16 = S1.u16 - S0.u16. - // Supports saturation (unsigned 16-bit integer domain). - // SQ translates this to V_SUB_U16 with reversed operands. - void - Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_LO_U16 class methods --- - - Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_lo_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_LO_U16 - - Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16() - { - } // ~Inst_VOP3__V_MUL_LO_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 * S1.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B16 class methods --- - - Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshlrev_b16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B16 - - Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16() - { - } // ~Inst_VOP3__V_LSHLREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHRREV_B16 class methods --- - - Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshrrev_b16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B16 - - Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16() - { - } // ~Inst_VOP3__V_LSHRREV_B16 - - // --- description from .arch file --- - // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ASHRREV_I16 class methods --- - - Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ashrrev_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I16 - - Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16() - { - } // ~Inst_VOP3__V_ASHRREV_I16 - - // --- description from .arch file --- - // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_F16 class methods --- - - Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MAX_F16 - - Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16() - { - } // ~Inst_VOP3__V_MAX_F16 - - // --- description from .arch file --- - // D.f16 = max(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MIN_F16 class methods --- - - Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_MIN_F16 - - Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16() - { - } // ~Inst_VOP3__V_MIN_F16 - - // --- description from .arch file --- - // D.f16 = min(S0.f16, S1.f16). - // IEEE compliant. Supports denormals, round mode, exception flags, - // saturation. - void - Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAX_U16 class methods --- - - Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_U16 - - Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16() - { - } // ~Inst_VOP3__V_MAX_U16 - - // --- description from .arch file --- - // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_I16 class methods --- - - Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX_I16 - - Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16() - { - } // ~Inst_VOP3__V_MAX_I16 - - // --- description from .arch file --- - // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::max(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_U16 class methods --- - - Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_U16 - - Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16() - { - } // ~Inst_VOP3__V_MIN_U16 - - // --- description from .arch file --- - // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). - void - Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_I16 class methods --- - - Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_i16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN_I16 - - Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16() - { - } // ~Inst_VOP3__V_MIN_I16 - - // --- description from .arch file --- - // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). - void - Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::min(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LDEXP_F16 class methods --- - - Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ldexp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_LDEXP_F16 - - Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16() - { - } // ~Inst_VOP3__V_LDEXP_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * (2 ** S1.i16). - void - Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_ADD_U32 class methods --- - - Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_U32 - - Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() - { - } // ~Inst_VOP3__V_ADD_U32 - - // --- description from .arch file --- - // D.u32 = S0.u32 + S1.u32. - void - Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUB_U32 class methods --- - - Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sub_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUB_U32 - - Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() - { - } // ~Inst_VOP3__V_SUB_U32 - - // --- description from .arch file --- - // D.u32 = S0.u32 - S1.u32. - void - Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SUBREV_U32 class methods --- - - Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_subrev_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SUBREV_U32 - - Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() - { - } // ~Inst_VOP3__V_SUBREV_U32 - - // --- description from .arch file --- - // D.u32 = S1.u32 - S0.u32. - void - Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] - src0[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_NOP class methods --- - - Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_nop", false) - { - setFlag(Nop); - setFlag(ALU); - } // Inst_VOP3__V_NOP - - Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP() - { - } // ~Inst_VOP3__V_NOP - - // --- description from .arch file --- - // Do nothing. - void - Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_VOP3__V_MOV_B32 class methods --- - - Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mov_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MOV_B32 - - Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32() - { - } // ~Inst_VOP3__V_MOV_B32 - - // --- description from .arch file --- - // D.u = S0.u. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_I32_F64 class methods --- - - Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_i32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_I32_F64 - - Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64() - { - } // ~Inst_VOP3__V_CVT_I32_F64 - - // --- description from .arch file --- - // D.i = (int)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F64_I32 class methods --- - - Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f64_i32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_I32 - - Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32() - { - } // ~Inst_VOP3__V_CVT_F64_I32 - - // --- description from .arch file --- - // D.d = (double)S0.i. - void - Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_I32 class methods --- - - Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_i32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_I32 - - Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32() - { - } // ~Inst_VOP3__V_CVT_F32_I32 - - // --- description from .arch file --- - // D.f = (float)S0.i. - void - Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - VecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_U32 class methods --- - - Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_u32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_U32 - - Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32() - { - } // ~Inst_VOP3__V_CVT_F32_U32 - - // --- description from .arch file --- - // D.f = (float)S0.u. - void - Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_U32_F32 class methods --- - - Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_u32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_U32_F32 - - Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32() - { - } // ~Inst_VOP3__V_CVT_U32_F32 - - // --- description from .arch file --- - // D.u = (unsigned)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_I32_F32 class methods --- - - Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_I32_F32 - - Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32() - { - } // ~Inst_VOP3__V_CVT_I32_F32 - - // --- description from .arch file --- - // D.i = (int)S0.f. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane]) || exp > 30) { - if (std::signbit(src[lane])) { - vdst[lane] = INT_MIN; - } else { - vdst[lane] = INT_MAX; - } - } else { - vdst[lane] = (VecElemI32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MOV_FED_B32 class methods --- - - Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mov_fed_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MOV_FED_B32 - - Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32() - { - } // ~Inst_VOP3__V_MOV_FED_B32 - - // --- description from .arch file --- - // D.u = S0.u; - // Introduce EDC double error upon write to dest vgpr without causing an - // --- exception. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F16_F32 class methods --- - - Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F16_F32 - - Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32() - { - } // ~Inst_VOP3__V_CVT_F16_F32 - - // --- description from .arch file --- - // D.f16 = flt32_to_flt16(S0.f). - // Supports input modifiers and creates FP16 denormals when appropriate. - void - Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F32_F16 class methods --- - - Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_f16", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_F16 - - Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16() - { - } // ~Inst_VOP3__V_CVT_F32_F16 - - // --- description from .arch file --- - // D.f = flt16_to_flt32(S0.f16). - // FP16 denormal inputs are always accepted. - void - Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods --- - - Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_RPI_I32_F32 - - Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32() - { - } // ~Inst_VOP3__V_CVT_RPI_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f + 0.5). - void - Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods --- - - Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_FLR_I32_F32 - - Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32() - { - } // ~Inst_VOP3__V_CVT_FLR_I32_F32 - - // --- description from .arch file --- - // D.i = (int)floor(S0.f). - void - Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemI32)std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods --- - - Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_OFF_F32_I4 - - Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4() - { - } // ~Inst_VOP3__V_CVT_OFF_F32_I4 - - // --- description from .arch file --- - // 4-bit signed int to 32-bit float. Used for interpolation in shader. - void - Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) - { - // Could not parse sq_uc.arch desc field - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F32_F64 class methods --- - - Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F32_F64 - - Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64() - { - } // ~Inst_VOP3__V_CVT_F32_F64 - - // --- description from .arch file --- - // D.f = (float)S0.d. - void - Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F64_F32 class methods --- - - Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f64_f32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_F32 - - Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32() - { - } // ~Inst_VOP3__V_CVT_F64_F32 - - // --- description from .arch file --- - // D.d = (double)S0.f. - void - Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE0 - - Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE0 - - // --- description from .arch file --- - // D.f = (float)(S0.u[7:0]). - void - Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 7, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE1 - - Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE1 - - // --- description from .arch file --- - // D.f = (float)(S0.u[15:8]). - void - Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 15, 8); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE2 - - Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE2 - - // --- description from .arch file --- - // D.f = (float)(S0.u[23:16]). - void - Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 23, 16); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods --- - - Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_F32_UBYTE3 - - Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3() - { - } // ~Inst_VOP3__V_CVT_F32_UBYTE3 - - // --- description from .arch file --- - // D.f = (float)(S0.u[31:24]). - void - Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF32)bits(src[lane], 31, 24); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_U32_F64 class methods --- - - Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_u32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_U32_F64 - - Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64() - { - } // ~Inst_VOP3__V_CVT_U32_F64 - - // --- description from .arch file --- - // D.u = (unsigned)S0.d. - // Out-of-range floating point values (including infinity) saturate. NaN is - // --- converted to 0. - void - Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp; - std::frexp(src[lane],&exp); - if (std::isnan(src[lane])) { - vdst[lane] = 0; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = 0; - } else { - vdst[lane] = UINT_MAX; - } - } else if (exp > 31) { - vdst[lane] = UINT_MAX; - } else { - vdst[lane] = (VecElemU32)src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_F64_U32 class methods --- - - Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f64_u32", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CVT_F64_U32 - - Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32() - { - } // ~Inst_VOP3__V_CVT_F64_U32 - - // --- description from .arch file --- - // D.d = (double)S0.u. - void - Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (VecElemF64)src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_TRUNC_F64 class methods --- - - Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trunc_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_TRUNC_F64 - - Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64() - { - } // ~Inst_VOP3__V_TRUNC_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d), return integer part of S0.d. - void - Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CEIL_F64 class methods --- - - Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ceil_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_CEIL_F64 - - Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64() - { - } // ~Inst_VOP3__V_CEIL_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. - void - Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RNDNE_F64 class methods --- - - Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rndne_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RNDNE_F64 - - Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64() - { - } // ~Inst_VOP3__V_RNDNE_F64 - - // --- description from .arch file --- - // D.d = round_nearest_even(S0.d). - void - Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FLOOR_F64 class methods --- - - Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_floor_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FLOOR_F64 - - Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64() - { - } // ~Inst_VOP3__V_FLOOR_F64 - - // --- description from .arch file --- - // D.d = trunc(S0.d); - // if(S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. - void - Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FRACT_F32 class methods --- - - Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fract_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FRACT_F32 - - Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32() - { - } // ~Inst_VOP3__V_FRACT_F32 - - // --- description from .arch file --- - // D.f = S0.f - floor(S0.f). - void - Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_TRUNC_F32 class methods --- - - Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trunc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_TRUNC_F32 - - Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32() - { - } // ~Inst_VOP3__V_TRUNC_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f), return integer part of S0.f. - void - Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::trunc(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CEIL_F32 class methods --- - - Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ceil_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CEIL_F32 - - Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32() - { - } // ~Inst_VOP3__V_CEIL_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. - void - Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ceil(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RNDNE_F32 class methods --- - - Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rndne_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RNDNE_F32 - - Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32() - { - } // ~Inst_VOP3__V_RNDNE_F32 - - // --- description from .arch file --- - // D.f = round_nearest_even(S0.f). - void - Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = roundNearestEven(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FLOOR_F32 class methods --- - - Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_floor_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FLOOR_F32 - - Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32() - { - } // ~Inst_VOP3__V_FLOOR_F32 - - // --- description from .arch file --- - // D.f = trunc(S0.f); - // if(S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. - void - Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::floor(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_EXP_F32 class methods --- - - Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_exp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_EXP_F32 - - Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32() - { - } // ~Inst_VOP3__V_EXP_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f). - void - Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LOG_F32 class methods --- - - Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_log_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LOG_F32 - - Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32() - { - } // ~Inst_VOP3__V_LOG_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm. - void - Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RCP_F32 class methods --- - - Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RCP_F32 - - Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32() - { - } // ~Inst_VOP3__V_RCP_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. - void - Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RCP_IFLAG_F32 class methods --- - - Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RCP_IFLAG_F32 - - Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32() - { - } // ~Inst_VOP3__V_RCP_IFLAG_F32 - - // --- description from .arch file --- - // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise - // --- integer DIV_BY_ZERO exception but cannot raise floating-point - // --- exceptions. - void - Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RSQ_F32 class methods --- - - Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rsq_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_RSQ_F32 - - Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32() - { - } // ~Inst_VOP3__V_RSQ_F32 - - // --- description from .arch file --- - // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. - void - Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RCP_F64 class methods --- - - Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RCP_F64 - - Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64() - { - } // ~Inst_VOP3__V_RCP_F64 - - // --- description from .arch file --- - // D.d = 1.0 / S0.d. - void - Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane])) { - if (std::signbit(src[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = 1.0 / src[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_RSQ_F64 class methods --- - - Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rsq_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_RSQ_F64 - - Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64() - { - } // ~Inst_VOP3__V_RSQ_F64 - - // --- description from .arch file --- - // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. - void - Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src[lane]) == FP_ZERO) { - vdst[lane] = +INFINITY; - } else if (std::isnan(src[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) { - vdst[lane] = 0.0; - } else if (std::signbit(src[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = 1.0 / std::sqrt(src[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SQRT_F32 class methods --- - - Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sqrt_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SQRT_F32 - - Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32() - { - } // ~Inst_VOP3__V_SQRT_F32 - - // --- description from .arch file --- - // D.f = sqrt(S0.f). - void - Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SQRT_F64 class methods --- - - Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sqrt_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_SQRT_F64 - - Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64() - { - } // ~Inst_VOP3__V_SQRT_F64 - - // --- description from .arch file --- - // D.d = sqrt(S0.d). - void - Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sqrt(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SIN_F32 class methods --- - - Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sin_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_SIN_F32 - - Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32() - { - } // ~Inst_VOP3__V_SIN_F32 - - // --- description from .arch file --- - // D.f = sin(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 0.0. - void - Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::sin(src[lane] * 2 * pi.rawData()); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_COS_F32 class methods --- - - Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cos_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_COS_F32 - - Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32() - { - } // ~Inst_VOP3__V_COS_F32 - - // --- description from .arch file --- - // D.f = cos(S0.f * 2 * PI). - // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in - // float 1.0. - void - Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - ConstScalarOperandF32 pi(gpuDynInst, REG_PI); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - pi.read(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::cos(src[lane] * 2 * pi.rawData()); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_NOT_B32 class methods --- - - Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_not_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_NOT_B32 - - Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32() - { - } // ~Inst_VOP3__V_NOT_B32 - - // --- description from .arch file --- - // D.u = ~S0.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ~src[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BFREV_B32 class methods --- - - Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfrev_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFREV_B32 - - Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32() - { - } // ~Inst_VOP3__V_BFREV_B32 - - // --- description from .arch file --- - // D.u[31:0] = S0.u[0:31], bitfield reverse. - // Input and output modifiers not supported. - void - Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = reverseBits(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FFBH_U32 class methods --- - - Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ffbh_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBH_U32 - - Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32() - { - } // ~Inst_VOP3__V_FFBH_U32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from MSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOneMsb(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FFBL_B32 class methods --- - - Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ffbl_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBL_B32 - - Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32() - { - } // ~Inst_VOP3__V_FFBL_B32 - - // --- description from .arch file --- - // D.u = position of first 1 in S0.u from LSB; - // D.u = 0xffffffff if S0.u == 0. - void - Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = findFirstOne(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FFBH_I32 class methods --- - - Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ffbh_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_FFBH_I32 - - Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32() - { - } // ~Inst_VOP3__V_FFBH_I32 - - // --- description from .arch file --- - // D.u = position of first bit different from sign bit in S0.i from MSB; - // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. - void - Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src(gpuDynInst, extData.SRC0); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = firstOppositeSignBit(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods --- - - Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FREXP_EXP_I32_F64 - - Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64() - { - } // ~Inst_VOP3__V_FREXP_EXP_I32_F64 - - // --- description from .arch file --- - // See V_FREXP_EXP_I32_F32. - void - Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_MANT_F64 class methods --- - - Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_mant_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FREXP_MANT_F64 - - Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64() - { - } // ~Inst_VOP3__V_FREXP_MANT_F64 - - // --- description from .arch file --- - // See V_FREXP_MANT_F32. - void - Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FRACT_F64 class methods --- - - Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fract_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_FRACT_F64 - - Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64() - { - } // ~Inst_VOP3__V_FRACT_F64 - - // --- description from .arch file --- - // See V_FRACT_F32. - void - Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src(gpuDynInst, extData.SRC0); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 int_part(0.0); - vdst[lane] = std::modf(src[lane], &int_part); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods --- - - Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FREXP_EXP_I32_F32 - - Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32() - { - } // ~Inst_VOP3__V_FREXP_EXP_I32_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.i = 0; - // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). - // Returns exponent of single precision float input, such that S0.f = - // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns - // the significand. - void - Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane])|| std::isnan(src[lane])) { - vdst[lane] = 0; - } else { - VecElemI32 exp(0); - std::frexp(src[lane], &exp); - vdst[lane] = exp; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FREXP_MANT_F32 class methods --- - - Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_mant_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_FREXP_MANT_F32 - - Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32() - { - } // ~Inst_VOP3__V_FREXP_MANT_F32 - - // --- description from .arch file --- - // if(S0.f == INF || S0.f == NAN) then D.f = S0.f; - // else D.f = Mantissa(S0.f). - // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary - // --- significand of single precision float input, such that S0.f = - // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which - // --- returns integer exponent. - void - Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isinf(src[lane]) || std::isnan(src[lane])) { - vdst[lane] = src[lane]; - } else { - VecElemI32 exp(0); - vdst[lane] = std::frexp(src[lane], &exp); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CLREXCP class methods --- - - Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_clrexcp", false) - { - } // Inst_VOP3__V_CLREXCP - - Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP() - { - } // ~Inst_VOP3__V_CLREXCP - - // --- description from .arch file --- - // Clear wave's exception state in SIMD (SP). - void - Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F16_U16 class methods --- - - Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f16_u16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_F16_U16 - - Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16() - { - } // ~Inst_VOP3__V_CVT_F16_U16 - - // --- description from .arch file --- - // D.f16 = uint16_to_flt16(S.u16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_F16_I16 class methods --- - - Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_f16_i16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_F16_I16 - - Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16() - { - } // ~Inst_VOP3__V_CVT_F16_I16 - - // --- description from .arch file --- - // D.f16 = int16_to_flt16(S.i16). - // Supports denormals, rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_U16_F16 class methods --- - - Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_u16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_U16_F16 - - Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16() - { - } // ~Inst_VOP3__V_CVT_U16_F16 - - // --- description from .arch file --- - // D.u16 = flt16_to_uint16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_I16_F16 class methods --- - - Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_i16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CVT_I16_F16 - - Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16() - { - } // ~Inst_VOP3__V_CVT_I16_F16 - - // --- description from .arch file --- - // D.i16 = flt16_to_int16(S.f16). - // Supports rounding, exception flags and saturation. - void - Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_RCP_F16 class methods --- - - Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rcp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RCP_F16 - - Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16() - { - } // ~Inst_VOP3__V_RCP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecip(S0.f16). - void - Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SQRT_F16 class methods --- - - Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sqrt_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SQRT_F16 - - Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16() - { - } // ~Inst_VOP3__V_SQRT_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateSqrt(S0.f16). - void - Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_RSQ_F16 class methods --- - - Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rsq_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RSQ_F16 - - Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16() - { - } // ~Inst_VOP3__V_RSQ_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 1.0f; - // else - // D.f16 = ApproximateRecipSqrt(S0.f16). - void - Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_LOG_F16 class methods --- - - Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_log_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_LOG_F16 - - Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16() - { - } // ~Inst_VOP3__V_LOG_F16 - - // --- description from .arch file --- - // if(S0.f16 == 1.0f) - // D.f16 = 0.0f; - // else - // D.f16 = ApproximateLog2(S0.f16). - void - Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_EXP_F16 class methods --- - - Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_exp_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_EXP_F16 - - Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16() - { - } // ~Inst_VOP3__V_EXP_F16 - - // --- description from .arch file --- - // if(S0.f16 == 0.0f) - // D.f16 = 1.0f; - // else - // D.f16 = Approximate2ToX(S0.f16). - void - Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FREXP_MANT_F16 class methods --- - - Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_mant_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FREXP_MANT_F16 - - Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16() - { - } // ~Inst_VOP3__V_FREXP_MANT_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.f16 = S0.f16; - // else - // D.f16 = mantissa(S0.f16). - // Result range is (-1.0,-0.5][0.5,1.0). - // C math library frexp function. - // Returns binary significand of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods --- - - Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FREXP_EXP_I16_F16 - - Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16() - { - } // ~Inst_VOP3__V_FREXP_EXP_I16_F16 - - // --- description from .arch file --- - // if(S0.f16 == +-INF || S0.f16 == NAN) - // D.i16 = 0; - // else - // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). - // C math library frexp function. - // Returns exponent of half precision float input, such that the - // original single float = significand * (2 ** exponent). - void - Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FLOOR_F16 class methods --- - - Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_floor_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FLOOR_F16 - - Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16() - { - } // ~Inst_VOP3__V_FLOOR_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. - void - Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CEIL_F16 class methods --- - - Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ceil_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_CEIL_F16 - - Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16() - { - } // ~Inst_VOP3__V_CEIL_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16); - // if(S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. - void - Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_TRUNC_F16 class methods --- - - Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trunc_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_TRUNC_F16 - - Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16() - { - } // ~Inst_VOP3__V_TRUNC_F16 - - // --- description from .arch file --- - // D.f16 = trunc(S0.f16). - // Round-to-zero semantics. - void - Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_RNDNE_F16 class methods --- - - Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_rndne_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_RNDNE_F16 - - Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16() - { - } // ~Inst_VOP3__V_RNDNE_F16 - - // --- description from .arch file --- - // D.f16 = FLOOR(S0.f16 + 0.5f); - // if(floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. - // Round-to-nearest-even semantics. - void - Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_FRACT_F16 class methods --- - - Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fract_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_FRACT_F16 - - Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16() - { - } // ~Inst_VOP3__V_FRACT_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 + -floor(S0.f16). - void - Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_SIN_F16 class methods --- - - Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sin_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_SIN_F16 - - Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16() - { - } // ~Inst_VOP3__V_SIN_F16 - - // --- description from .arch file --- - // D.f16 = sin(S0.f16 * 2 * PI). - void - Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_COS_F16 class methods --- - - Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cos_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_COS_F16 - - Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16() - { - } // ~Inst_VOP3__V_COS_F16 - - // --- description from .arch file --- - // D.f16 = cos(S0.f16 * 2 * PI). - void - Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_EXP_LEGACY_F32 class methods --- - - Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_exp_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_EXP_LEGACY_F32 - - Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32() - { - } // ~Inst_VOP3__V_EXP_LEGACY_F32 - - // --- description from .arch file --- - // D.f = pow(2.0, S0.f) with legacy semantics. - void - Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - if (instData.ABS & 0x1) { - src.absModifier(); - } - - if (extData.NEG & 0x1) { - src.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::pow(2.0, src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LOG_LEGACY_F32 class methods --- - - Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_log_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LOG_LEGACY_F32 - - Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32() - { - } // ~Inst_VOP3__V_LOG_LEGACY_F32 - - // --- description from .arch file --- - // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. - void - Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src(gpuDynInst, extData.SRC0); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::log2(src[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_LEGACY_F32 class methods --- - - Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_legacy_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP3__V_MAD_LEGACY_F32 - - Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32() - { - } // ~Inst_VOP3__V_MAD_LEGACY_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + S2.f (DX9 rules, 0.0 * x = 0.0). - void - Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_F32 class methods --- - - Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(MAD); - } // Inst_VOP3__V_MAD_F32 - - Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32() - { - } // ~Inst_VOP3__V_MAD_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + S2.f. - void - Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_I32_I24 class methods --- - - Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_i32_i24", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I32_I24 - - Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24() - { - } // ~Inst_VOP3__V_MAD_I32_I24 - - // --- description from .arch file --- - // D.i = S0.i[23:0] * S1.i[23:0] + S2.i. - void - Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) - * sext<24>(bits(src1[lane], 23, 0)) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_U32_U24 class methods --- - - Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_u32_u24", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U32_U24 - - Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24() - { - } // ~Inst_VOP3__V_MAD_U32_U24 - - // --- description from .arch file --- - // D.u = S0.u[23:0] * S1.u[23:0] + S2.u. - void - Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0) - + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CUBEID_F32 class methods --- - - Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubeid_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBEID_F32 - - Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32() - { - } // ~Inst_VOP3__V_CUBEID_F32 - - // --- description from .arch file --- - // D.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). XYZ coordinate is given in - // --- (S0.f, S1.f, S2.f). - void - Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CUBESC_F32 class methods --- - - Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubesc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBESC_F32 - - Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32() - { - } // ~Inst_VOP3__V_CUBESC_F32 - - // --- description from .arch file --- - // D.f = cubemap S coordinate. XYZ coordinate is given in (S0.f, S1.f, - // S2.f). - void - Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CUBETC_F32 class methods --- - - Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubetc_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBETC_F32 - - Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32() - { - } // ~Inst_VOP3__V_CUBETC_F32 - - // --- description from .arch file --- - // D.f = cubemap T coordinate. XYZ coordinate is given in (S0.f, S1.f, - // S2.f). - void - Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CUBEMA_F32 class methods --- - - Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cubema_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CUBEMA_F32 - - Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32() - { - } // ~Inst_VOP3__V_CUBEMA_F32 - - // --- description from .arch file --- - // D.f = 2.0 * cubemap major axis. XYZ coordinate is given in (S0.f, S1.f, - // --- S2.f). - void - Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_BFE_U32 class methods --- - - Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfe_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFE_U32 - - Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32() - { - } // ~Inst_VOP3__V_BFE_U32 - - // --- description from .arch file --- - // D.u = (S0.u>>S1.u[4:0]) & ((1<wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) - & ((1 << bits(src2[lane], 4, 0)) - 1); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BFE_I32 class methods --- - - Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfe_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFE_I32 - - Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32() - { - } // ~Inst_VOP3__V_BFE_I32 - - // --- description from .arch file --- - // D.i = (S0.i>>S1.u[4:0]) & ((1<wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) - & ((1 << bits(src2[lane], 4, 0)) - 1); - - // Above extracted a signed int of size src2 bits which needs - // to be signed-extended. Check if the MSB of our src2-bit - // integer is 1, and sign extend it is. - if (vdst[lane] >> (bits(src2[lane], 4, 0) - 1)) { - vdst[lane] |= 0xffffffff << bits(src2[lane], 4, 0); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BFI_B32 class methods --- - - Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfi_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFI_B32 - - Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32() - { - } // ~Inst_VOP3__V_BFI_B32 - - // --- description from .arch file --- - // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert. - void - Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane] - & src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FMA_F32 class methods --- - - Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fma_f32", false) - { - setFlag(ALU); - setFlag(F32); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F32 - - Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32() - { - } // ~Inst_VOP3__V_FMA_F32 - - // --- description from .arch file --- - // D.f = S0.f * S1.f + S2.f. - void - Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FMA_F64 class methods --- - - Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fma_f64", false) - { - setFlag(ALU); - setFlag(F64); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F64 - - Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64() - { - } // ~Inst_VOP3__V_FMA_F64 - - // --- description from .arch file --- - // D.d = S0.d * S1.d + S2.d. - void - Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LERP_U8 class methods --- - - Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lerp_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LERP_U8 - - Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8() - { - } // ~Inst_VOP3__V_LERP_U8 - - // --- description from .arch file --- - // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24 - // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16; - // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8; - // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1). - // Unsigned 8-bit pixel average on packed unsigned bytes (linear - // --- interpolation). S2 acts as a round mode; if set, 0.5 rounds up, - // --- otherwise 0.5 truncates. - void - Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ((bits(src0[lane], 31, 24) - + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1) - << 24; - vdst[lane] += ((bits(src0[lane], 23, 16) - + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1) - << 16; - vdst[lane] += ((bits(src0[lane], 15, 8) - + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1) - << 8; - vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0) - + bits(src2[lane], 0)) >> 1); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ALIGNBIT_B32 class methods --- - - Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_alignbit_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ALIGNBIT_B32 - - Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32() - { - } // ~Inst_VOP3__V_ALIGNBIT_B32 - - // --- description from .arch file --- - // D.u = ({S0,S1} >> S2.u[4:0]) & 0xffffffff. - void - Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) - | (VecElemU64)src1[lane]); - vdst[lane] = (VecElemU32)((src_0_1 - >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ALIGNBYTE_B32 class methods --- - - Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_alignbyte_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ALIGNBYTE_B32 - - Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32() - { - } // ~Inst_VOP3__V_ALIGNBYTE_B32 - - // --- description from .arch file --- - // D.u = ({S0,S1} >> (8*S2.u[4:0])) & 0xffffffff. - void - Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) - | (VecElemU64)src1[lane]); - vdst[lane] = (VecElemU32)((src_0_1 - >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0))) - & 0xffffffff); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN3_F32 class methods --- - - Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MIN3_F32 - - Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32() - { - } // ~Inst_VOP3__V_MIN3_F32 - - // --- description from .arch file --- - // D.f = min(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]); - vdst[lane] = std::fmin(min_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN3_I32 class methods --- - - Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN3_I32 - - Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32() - { - } // ~Inst_VOP3__V_MIN3_I32 - - // --- description from .arch file --- - // D.i = min(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]); - vdst[lane] = std::min(min_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN3_U32 class methods --- - - Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MIN3_U32 - - Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32() - { - } // ~Inst_VOP3__V_MIN3_U32 - - // --- description from .arch file --- - // D.u = min(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]); - vdst[lane] = std::min(min_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX3_F32 class methods --- - - Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MAX3_F32 - - Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32() - { - } // ~Inst_VOP3__V_MAX3_F32 - - // --- description from .arch file --- - // D.f = max(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]); - vdst[lane] = std::fmax(max_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX3_I32 class methods --- - - Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX3_I32 - - Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32() - { - } // ~Inst_VOP3__V_MAX3_I32 - - // --- description from .arch file --- - // D.i = max(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]); - vdst[lane] = std::max(max_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX3_U32 class methods --- - - Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MAX3_U32 - - Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32() - { - } // ~Inst_VOP3__V_MAX3_U32 - - // --- description from .arch file --- - // D.u = max(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]); - vdst[lane] = std::max(max_0_1, src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MED3_F32 class methods --- - - Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_med3_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_MED3_F32 - - Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32() - { - } // ~Inst_VOP3__V_MED3_F32 - - // --- description from .arch file --- - // D.f = median(S0.f, S1.f, S2.f). - void - Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MED3_I32 class methods --- - - Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_med3_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MED3_I32 - - Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32() - { - } // ~Inst_VOP3__V_MED3_I32 - - // --- description from .arch file --- - // D.i = median(S0.i, S1.i, S2.i). - void - Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MED3_U32 class methods --- - - Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_med3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MED3_U32 - - Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32() - { - } // ~Inst_VOP3__V_MED3_U32 - - // --- description from .arch file --- - // D.u = median(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = median(src0[lane], src1[lane], src2[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_U8 class methods --- - - Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U8 - - Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8() - { - } // ~Inst_VOP3__V_SAD_U8 - - // --- description from .arch file --- - // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) + - // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u. - // Sum of absolute differences with accumulation, overflow into upper bits - // is allowed. - void - Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(bits(src0[lane], 31, 24) - - bits(src1[lane], 31, 24)) - + std::abs(bits(src0[lane], 23, 16) - - bits(src1[lane], 23, 16)) - + std::abs(bits(src0[lane], 15, 8) - - bits(src1[lane], 15, 8)) - + std::abs(bits(src0[lane], 7, 0) - - bits(src1[lane], 7, 0)) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_HI_U8 class methods --- - - Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_hi_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_HI_U8 - - Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8() - { - } // ~Inst_VOP3__V_SAD_HI_U8 - - // --- description from .arch file --- - // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u. - // Sum of absolute differences with accumulation, overflow is lost. - void - Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (((bits(src0[lane], 31, 24) - - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16) - - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8) - - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0) - - bits(src1[lane], 7, 0))) << 16) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_U16 class methods --- - - Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_u16", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U16 - - Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16() - { - } // ~Inst_VOP3__V_SAD_U16 - - // --- description from .arch file --- - // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0]) - // + S2.u. - // Word SAD with accumulation. - void - Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(bits(src0[lane], 31, 16) - - bits(src1[lane], 31, 16)) - + std::abs(bits(src0[lane], 15, 0) - - bits(src1[lane], 15, 0)) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_SAD_U32 class methods --- - - Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_sad_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_SAD_U32 - - Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32() - { - } // ~Inst_VOP3__V_SAD_U32 - - // --- description from .arch file --- - // D.u = abs(S0.i - S1.i) + S2.u. - // Dword SAD with accumulation. - void - Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane]; - } // if - } // for - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_PK_U8_F32 class methods --- - - Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pk_u8_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PK_U8_F32 - - Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32() - { - } // ~Inst_VOP3__V_CVT_PK_U8_F32 - - // --- description from .arch file --- - // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0])) - // | (S2.u & ~(0xff << (8 * S1.u[1:0]))). - // Convert floating point value S0 to 8-bit unsigned integer and pack the - // result into byte S1 of dword S2. - void - Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (((VecElemU8)src0[lane] & 0xff) - << (8 * bits(src1[lane], 1, 0))) - | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0)))); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F32 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fixup_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_DIV_FIXUP_F32 - - Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F32 - - // --- description from .arch file --- - // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator, - // s2.f = Numerator. This opcode generates exceptions resulting from the - // division operation. - void - Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::signbit(src1[lane])) { - vdst[lane] = -INFINITY; - } else { - vdst[lane] = +INFINITY; - } - } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if (std::isinf(src1[lane])) { - if (std::signbit(src1[lane])) { - vdst[lane] = -INFINITY; - } else { - vdst[lane] = +INFINITY; - } - } else { - vdst[lane] = src2[lane] / src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fixup_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_DIV_FIXUP_F64 - - Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F64 - - // --- description from .arch file --- - // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator, - // s2.d = Numerator. This opcode generates exceptions resulting from the - // division operation. - void - Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int sign_out = std::signbit(src1[lane]) - ^ std::signbit(src2[lane]); - int exp1(0); - int exp2(0); - std::frexp(src1[lane], &exp1); - std::frexp(src2[lane], &exp2); - - if (std::isnan(src1[lane]) || std::isnan(src2[lane])) { - vdst[lane] = std::numeric_limits::quiet_NaN(); - } else if (std::fpclassify(src1[lane]) == FP_ZERO - && std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] - = std::numeric_limits::signaling_NaN(); - } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) { - vdst[lane] - = std::numeric_limits::signaling_NaN(); - } else if (std::fpclassify(src1[lane]) == FP_ZERO - || std::isinf(src2[lane])) { - vdst[lane] = sign_out ? -INFINITY : +INFINITY; - } else if (std::isinf(src1[lane]) - || std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] = sign_out ? -0.0 : +0.0; - } else if (exp2 - exp1 < -1075) { - vdst[lane] = src0[lane]; - } else if (exp1 == 2047) { - vdst[lane] = src0[lane]; - } else { - vdst[lane] = sign_out ? -std::fabs(src0[lane]) - : std::fabs(src0[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_SCALE_F32 class methods --- - - Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_div_scale_f32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(F32); - } // Inst_VOP3__V_DIV_SCALE_F32 - - Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32() - { - } // ~Inst_VOP3__V_DIV_SCALE_F32 - - // --- description from .arch file --- - // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f = - // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a - // numerator and denominator, this opcode will appropriately scale inputs - // for division to avoid subnormal terms during Newton-Raphson correction - // algorithm. This opcode producses a VCC flag for post-scale of quotient. - void - Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane]; - vcc.setBit(lane, 0); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_SCALE_F64 class methods --- - - Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_div_scale_f64") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(F64); - } // Inst_VOP3__V_DIV_SCALE_F64 - - Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64() - { - } // ~Inst_VOP3__V_DIV_SCALE_F64 - - // --- description from .arch file --- - // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d = - // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a - // numerator and denominator, this opcode will appropriately scale inputs - // for division to avoid subnormal terms during Newton-Raphson correction - // algorithm. This opcode producses a VCC flag for post-scale of quotient. - void - Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - int exp1(0); - int exp2(0); - std::frexp(src1[lane], &exp1); - std::frexp(src2[lane], &exp2); - vcc.setBit(lane, 0); - - if (std::fpclassify(src1[lane]) == FP_ZERO - || std::fpclassify(src2[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (exp2 - exp1 >= 768) { - vcc.setBit(lane, 1); - if (src0[lane] == src1[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) { - vdst[lane] = std::ldexp(src0[lane], 128); - } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL - && std::fpclassify(src2[lane] / src1[lane]) - == FP_SUBNORMAL) { - vcc.setBit(lane, 1); - if (src0[lane] == src1[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) { - vdst[lane] = std::ldexp(src0[lane], -128); - } else if (std::fpclassify(src2[lane] / src1[lane]) - == FP_SUBNORMAL) { - vcc.setBit(lane, 1); - if (src0[lane] == src2[lane]) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } else if (exp2 <= 53) { - vdst[lane] = std::ldexp(src0[lane], 128); - } - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FMAS_F32 class methods --- - - Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fmas_f32", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - setFlag(F32); - setFlag(FMA); - } // Inst_VOP3__V_DIV_FMAS_F32 - - Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32() - { - } // ~Inst_VOP3__V_DIV_FMAS_F32 - - // --- description from .arch file --- - // D.f = Special case divide FMA with scale and flags(s0.f = Quotient, - // s1.f = Denominator, s2.f = Numerator) - void - Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - - //vdst.write(); - } // execute - // --- Inst_VOP3__V_DIV_FMAS_F64 class methods --- - - Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fmas_f64", false) - { - setFlag(ALU); - setFlag(ReadsVCC); - setFlag(F64); - setFlag(FMA); - } // Inst_VOP3__V_DIV_FMAS_F64 - - Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64() - { - } // ~Inst_VOP3__V_DIV_FMAS_F64 - - // --- description from .arch file --- - // D.d = Special case divide FMA with scale and flags(s0.d = Quotient, - // s1.d = Denominator, s2.d = Numerator) - void - Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - vcc.read(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (instData.ABS & 0x4) { - src2.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - if (extData.NEG & 0x4) { - src2.negModifier(); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (bits(vcc.rawData(), lane)) { - vdst[lane] = std::pow(2, 64) - * std::fma(src0[lane], src1[lane], src2[lane]); - } else { - vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MSAD_U8 class methods --- - - Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_msad_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MSAD_U8 - - Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8() - { - } // ~Inst_VOP3__V_MSAD_U8 - - // --- description from .arch file --- - // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u). - void - Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_QSAD_PK_U16_U8 class methods --- - - Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_qsad_pk_u16_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_QSAD_PK_U16_U8 - - Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8() - { - } // ~Inst_VOP3__V_QSAD_PK_U16_U8 - - // --- description from .arch file --- - // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], - // S1.u[31:0], S2.u[63:0]) - void - Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MQSAD_PK_U16_U8 class methods --- - - Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mqsad_pk_u16_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MQSAD_PK_U16_U8 - - Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8() - { - } // ~Inst_VOP3__V_MQSAD_PK_U16_U8 - - // --- description from .arch file --- - // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], - // --- S1.u[31:0], S2.u[63:0]) - void - Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MQSAD_U32_U8 class methods --- - - Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mqsad_u32_u8", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MQSAD_U32_U8 - - Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8() - { - } // ~Inst_VOP3__V_MQSAD_U32_U8 - - // --- description from .arch file --- - // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0], - // --- S1.u[31:0], S2.u[127:0]) - void - Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAD_U64_U32 class methods --- - - Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_mad_u64_u32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U64_U32 - - Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32() - { - } // ~Inst_VOP3__V_MAD_U64_U32 - - // --- description from .arch file --- - // {vcc_out,D.u64} = S0.u32 * S1.u32 + S2.u64. - void - Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - vdst.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], - src2[lane])); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_I64_I32 class methods --- - - Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32( - InFmt_VOP3B *iFmt) - : Inst_VOP3B(iFmt, "v_mad_i64_i32") - { - setFlag(ALU); - setFlag(WritesVCC); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I64_I32 - - Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32() - { - } // ~Inst_VOP3__V_MAD_I64_I32 - - // --- description from .arch file --- - // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64. - void - Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI64 src2(gpuDynInst, extData.SRC2); - ScalarOperandU64 vcc(gpuDynInst, instData.SDST); - VecOperandI64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], - src2[lane])); - } - } - - vcc.write(); - vdst.write(); - } // execute - // --- Inst_VOP3__V_XAD_U32 class methods --- - - Inst_VOP3__V_XAD_U32::Inst_VOP3__V_XAD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_xad_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_XAD_U32 - - Inst_VOP3__V_XAD_U32::~Inst_VOP3__V_XAD_U32() - { - } // ~Inst_VOP3__V_XAD_U32 - - // --- description from .arch file --- - // D.u32 = (S0.u32 ^ S1.u32) + S2.u32. - void - Inst_VOP3__V_XAD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHL_ADD_U32 class methods --- - - Inst_VOP3__V_LSHL_ADD_U32::Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshl_add_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHL_ADD_U32 - - Inst_VOP3__V_LSHL_ADD_U32::~Inst_VOP3__V_LSHL_ADD_U32() - { - } // ~Inst_VOP3__V_LSHL_ADD_U32 - - // --- description from .arch file --- - // D.u = (S0.u << S1.u[4:0]) + S2.u. - void - Inst_VOP3__V_LSHL_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) - + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD_LSHL_U32 class methods --- - - Inst_VOP3__V_ADD_LSHL_U32::Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_lshl_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD_LSHL_U32 - - Inst_VOP3__V_ADD_LSHL_U32::~Inst_VOP3__V_ADD_LSHL_U32() - { - } // ~Inst_VOP3__V_ADD_LSHL_U32 - - // --- description from .arch file --- - // D.u = (S0.u + S1.u) << S2.u[4:0]. - void - Inst_VOP3__V_ADD_LSHL_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = - (src0[lane] + src1[lane]) << bits(src2[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ADD3_U32 class methods --- - - Inst_VOP3__V_ADD3_U32::Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add3_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ADD3_U32 - - Inst_VOP3__V_ADD3_U32::~Inst_VOP3__V_ADD3_U32() - { - } // ~Inst_VOP3__V_ADD3_U32 - - // --- description from .arch file --- - // D.u = S0.u + S1.u + S2.u. - void - Inst_VOP3__V_ADD3_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] + src1[lane] + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHL_OR_B32 class methods --- - - Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshl_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHL_OR_B32 - - Inst_VOP3__V_LSHL_OR_B32::~Inst_VOP3__V_LSHL_OR_B32() - { - } // ~Inst_VOP3__V_LSHL_OR_B32 - - // --- description from .arch file --- - // D.u = (S0.u << S1.u[4:0]) | S2.u. - void - Inst_VOP3__V_LSHL_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) - | src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_AND_OR_B32 class methods --- - - Inst_VOP3__V_AND_OR_B32::Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_and_or_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_AND_OR_B32 - - Inst_VOP3__V_AND_OR_B32::~Inst_VOP3__V_AND_OR_B32() - { - } // ~Inst_VOP3__V_AND_OR_B32 - - // --- description from .arch file --- - // D.u = (S0.u & S1.u) | S2.u. - // Input and output modifiers not supported. - void - Inst_VOP3__V_AND_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = (src0[lane] & src1[lane]) | src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_F16 class methods --- - - Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(MAD); - } // Inst_VOP3__V_MAD_F16 - - Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16() - { - } // ~Inst_VOP3__V_MAD_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + S2.f16. - // Supports round mode, exception flags, saturation. - void - Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_MAD_U16 class methods --- - - Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_u16", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_U16 - - Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16() - { - } // ~Inst_VOP3__V_MAD_U16 - - // --- description from .arch file --- - // D.u16 = S0.u16 * S1.u16 + S2.u16. - // Supports saturation (unsigned 16-bit integer domain). - void - Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU16 src2(gpuDynInst, extData.SRC2); - VecOperandU16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane] + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAD_I16 class methods --- - - Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mad_i16", false) - { - setFlag(ALU); - setFlag(MAD); - } // Inst_VOP3__V_MAD_I16 - - Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16() - { - } // ~Inst_VOP3__V_MAD_I16 - - // --- description from .arch file --- - // D.i16 = S0.i16 * S1.i16 + S2.i16. - // Supports saturation (signed 16-bit integer domain). - void - Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); - ConstVecOperandI16 src2(gpuDynInst, extData.SRC2); - VecOperandI16 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src0[lane] * src1[lane] + src2[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_PERM_B32 class methods --- - - Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_perm_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_PERM_B32 - - Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32() - { - } // ~Inst_VOP3__V_PERM_B32 - - // --- description from .arch file --- - // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]); - // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]); - // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]); - // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]); - // byte permute(byte in[8], byte sel) { - // if(sel>=13) then return 0xff; - // elsif(sel==12) then return 0x00; - // elsif(sel==11) then return in[7][7] * 0xff; - // elsif(sel==10) then return in[5][7] * 0xff; - // elsif(sel==9) then return in[3][7] * 0xff; - // elsif(sel==8) then return in[1][7] * 0xff; - // else return in[sel]; - // } - // Byte permute. - void - Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - src2.readSrc(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemU64 selector = (VecElemU64)src0[lane]; - selector = (selector << 32) | (VecElemU64)src1[lane]; - vdst[lane] = 0; - - DPRINTF(VEGA, "Executing v_perm_b32 src_0 0x%08x, src_1 " - "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane], - src1[lane], src2[lane], vdst[lane]); - DPRINTF(VEGA, "Selector: 0x%08x \n", selector); - - for (int i = 0; i < 4 ; ++i) { - VecElemU32 permuted_val = permute(selector, 0xFF - & ((VecElemU32)src2[lane] >> (8 * i))); - vdst[lane] |= (permuted_val << (8 * i)); - } - - DPRINTF(VEGA, "v_perm result: 0x%08x\n", vdst[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_FMA_F16 class methods --- - - Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_fma_f16", false) - { - setFlag(ALU); - setFlag(F16); - setFlag(FMA); - } // Inst_VOP3__V_FMA_F16 - - Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16() - { - } // ~Inst_VOP3__V_FMA_F16 - - // --- description from .arch file --- - // D.f16 = S0.f16 * S1.f16 + S2.f16. - // Fused half precision multiply add. - void - Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_DIV_FIXUP_F16 class methods --- - - Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_div_fixup_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_DIV_FIXUP_F16 - - Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16() - { - } // ~Inst_VOP3__V_DIV_FIXUP_F16 - - // --- description from .arch file --- - // sign_out = sign(S1.f16)^sign(S2.f16); - // if (S2.f16 == NAN) - // D.f16 = Quiet(S2.f16); - // else if (S1.f16 == NAN) - // D.f16 = Quiet(S1.f16); - // else if (S1.f16 == S2.f16 == 0) - // # 0/0 - // D.f16 = pele_nan(0xfe00); - // else if (abs(S1.f16) == abs(S2.f16) == +-INF) - // # inf/inf - // D.f16 = pele_nan(0xfe00); - // else if (S1.f16 ==0 || abs(S2.f16) == +-INF) - // # x/0, or inf/y - // D.f16 = sign_out ? -INF : INF; - // else if (abs(S1.f16) == +-INF || S2.f16 == 0) - // # x/inf, 0/y - // D.f16 = sign_out ? -0 : 0; - // else if ((exp(S2.f16) - exp(S1.f16)) < -150) - // D.f16 = sign_out ? -underflow : underflow; - // else if (exp(S1.f16) == 255) - // D.f16 = sign_out ? -overflow : overflow; - // else - // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16). - // Half precision division fixup. - // S0 = Quotient, S1 = Denominator, S3 = Numerator. - // Given a numerator, denominator, and quotient from a divide, this opcode - // will detect and apply special case numerics, touching up the quotient if - // necessary. This opcode also generates invalid, denorm and divide by - // zero exceptions caused by the division. - void - Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PKACCUM_U8_F32 class methods --- - - Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pkaccum_u8_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32() - { - } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32 - - // --- description from .arch file --- - // byte = S1.u[1:0]; bit = byte * 8; - // D.u[bit+7:bit] = flt32_to_uint8(S0.f); - // Pack converted value of S0.f into byte S1 of the destination. - // SQ translates to V_CVT_PK_U8_F32. - // Note: this opcode uses src_c to pass destination in as a source. - void - Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P1_F32 class methods --- - - Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p1_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_P1_F32 - - Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32() - { - } // ~Inst_VOP3__V_INTERP_P1_F32 - - // --- description from .arch file --- - // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; if - // D == S then data corruption will occur. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P2_F32 class methods --- - - Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p2_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_P2_F32 - - Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32() - { - } // ~Inst_VOP3__V_INTERP_P2_F32 - - // --- description from .arch file --- - // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to - // V_MAD_F32 for SP). - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_MOV_F32 class methods --- - - Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_mov_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_INTERP_MOV_F32 - - Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32() - { - } // ~Inst_VOP3__V_INTERP_MOV_F32 - - // --- description from .arch file --- - // D.f = {P10,P20,P0}[S.u]; parameter load. - void - Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P1LL_F16 class methods --- - - Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p1ll_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P1LL_F16 - - Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16() - { - } // ~Inst_VOP3__V_INTERP_P1LL_F16 - - // --- description from .arch file --- - // D.f32 = P10.f16 * S0.f32 + P0.f16. - // 'LL' stands for 'two LDS arguments'. - // attr_word selects the high or low half 16 bits of each LDS dword - // accessed. - // This opcode is available for 32-bank LDS only. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P1LV_F16 class methods --- - - Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p1lv_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P1LV_F16 - - Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16() - { - } // ~Inst_VOP3__V_INTERP_P1LV_F16 - - // --- description from .arch file --- - // D.f32 = P10.f16 * S0.f32 + (S2.u32 >> (attr_word * 16)).f16. - // 'LV' stands for 'One LDS and one VGPR argument'. - // S2 holds two parameters, attr_word selects the high or low word of the - // VGPR for this calculation, as well as the high or low half of the LDS - // data. - // Meant for use with 16-bank LDS. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_INTERP_P2_F16 class methods --- - - Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_interp_p2_f16", false) - { - setFlag(ALU); - setFlag(F16); - } // Inst_VOP3__V_INTERP_P2_F16 - - Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16() - { - } // ~Inst_VOP3__V_INTERP_P2_F16 - - // --- description from .arch file --- - // D.f16 = P20.f16 * S0.f32 + S2.f32. - // Final computation. attr_word selects LDS high or low 16bits. Used for - // both 16- and 32-bank LDS. - // Result is always written to the 16 LSBs of the destination VGPR. - // NOTE: In textual representations the I/J VGPR is the first source and - // the attribute is the second source; however in the VOP3 encoding the - // attribute is stored in the src0 field and the VGPR is stored in the - // src1 field. - void - Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_ADD_F64 class methods --- - - Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_add_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_ADD_F64 - - Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64() - { - } // ~Inst_VOP3__V_ADD_F64 - - // --- description from .arch file --- - // D.d = S0.d + S1.d. - void - Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane]) ) { - vdst[lane] = NAN; - } else if (std::isinf(src0[lane]) && - std::isinf(src1[lane])) { - if (std::signbit(src0[lane]) != - std::signbit(src1[lane])) { - vdst[lane] = NAN; - } else { - vdst[lane] = src0[lane]; - } - } else if (std::isinf(src0[lane])) { - vdst[lane] = src0[lane]; - } else if (std::isinf(src1[lane])) { - vdst[lane] = src1[lane]; - } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::signbit(src0[lane]) && - std::signbit(src1[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = src1[lane]; - } - } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::signbit(src0[lane]) && - std::signbit(src1[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = 0.0; - } - } else { - vdst[lane] = src0[lane]; - } - } else { - vdst[lane] = src0[lane] + src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_F64 class methods --- - - Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MUL_F64 - - Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64() - { - } // ~Inst_VOP3__V_MUL_F64 - - // --- description from .arch file --- - // D.d = S0.d * S1.d. - void - Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || - std::isnan(src1[lane])) { - vdst[lane] = NAN; - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - !std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || - std::fpclassify(src0[lane]) == FP_ZERO) && - std::signbit(src0[lane])) { - if (std::isinf(src1[lane])) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +0.0; - } else { - vdst[lane] = -0.0; - } - } else if (std::isinf(src0[lane]) && - !std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (!std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else if (std::isinf(src0[lane]) && - std::signbit(src0[lane])) { - if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || - std::fpclassify(src1[lane]) == FP_ZERO) { - vdst[lane] = NAN; - } else if (std::signbit(src1[lane])) { - vdst[lane] = +INFINITY; - } else { - vdst[lane] = -INFINITY; - } - } else { - vdst[lane] = src0[lane] * src1[lane]; - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MIN_F64 class methods --- - - Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_min_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MIN_F64 - - Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64() - { - } // ~Inst_VOP3__V_MIN_F64 - - // --- description from .arch file --- - // D.d = min(S0.d, S1.d). - void - Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmin(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MAX_F64 class methods --- - - Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_max_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_MAX_F64 - - Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64() - { - } // ~Inst_VOP3__V_MAX_F64 - - // --- description from .arch file --- - // D.d = max(S0.d, S1.d). - void - Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (instData.ABS & 0x2) { - src1.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - if (extData.NEG & 0x2) { - src1.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::fmax(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LDEXP_F64 class methods --- - - Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ldexp_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_LDEXP_F64 - - Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64() - { - } // ~Inst_VOP3__V_LDEXP_F64 - - // --- description from .arch file --- - // D.d = pow(S0.d, S1.i[31:0]). - void - Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandF64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - if (instData.ABS & 0x1) { - src0.absModifier(); - } - - if (extData.NEG & 0x1) { - src0.negModifier(); - } - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - if (std::isnan(src0[lane]) || std::isinf(src0[lane])) { - vdst[lane] = src0[lane]; - } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL - || std::fpclassify(src0[lane]) == FP_ZERO) { - if (std::signbit(src0[lane])) { - vdst[lane] = -0.0; - } else { - vdst[lane] = +0.0; - } - } else { - vdst[lane] = std::ldexp(src0[lane], src1[lane]); - } - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_LO_U32 class methods --- - - Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_lo_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_LO_U32 - - Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32() - { - } // ~Inst_VOP3__V_MUL_LO_U32 - - // --- description from .arch file --- - // D.u = S0.u * S1.u. - void - Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_U32 class methods --- - - Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_U32 - - Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32() - { - } // ~Inst_VOP3__V_MUL_HI_U32 - - // --- description from .arch file --- - // D.u = (S0.u * S1.u) >> 32. - void - Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] - = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MUL_HI_I32 class methods --- - - Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mul_hi_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MUL_HI_I32 - - Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32() - { - } // ~Inst_VOP3__V_MUL_HI_I32 - - // --- description from .arch file --- - // D.i = (S0.i * S1.i) >> 32. - void - Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandI32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - VecElemI64 s0 = (VecElemI64)src0[lane]; - VecElemI64 s1 = (VecElemI64)src1[lane]; - vdst[lane] - = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LDEXP_F32 class methods --- - - Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ldexp_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_LDEXP_F32 - - Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32() - { - } // ~Inst_VOP3__V_LDEXP_F32 - - // --- description from .arch file --- - // D.f = pow(S0.f, S1.i) - void - Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); - VecOperandF32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = std::ldexp(src0[lane], src1[lane]); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_READLANE_B32 class methods --- - - Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_readlane_b32", true) - { - setFlag(ALU); - setFlag(IgnoreExec); - } // Inst_VOP3__V_READLANE_B32 - - Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32() - { - } // ~Inst_VOP3__V_READLANE_B32 - - // --- description from .arch file --- - // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR# - // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask. - // Input and output modifiers not supported; this is an untyped operation. - void - Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); - ScalarOperandU32 sdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - sdst = src0[src1.rawData() & 0x3f]; - - sdst.write(); - } // execute - // --- Inst_VOP3__V_WRITELANE_B32 class methods --- - - Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_writelane_b32", false) - { - setFlag(ALU); - setFlag(IgnoreExec); - } // Inst_VOP3__V_WRITELANE_B32 - - Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32() - { - } // ~Inst_VOP3__V_WRITELANE_B32 - - // --- description from .arch file --- - // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data - // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores - // exec mask. - // Input and output modifiers not supported; this is an untyped operation. - // SQ translates to V_MOV_B32. - void - Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst) - { - ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0); - ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.read(); - src1.read(); - vdst.read(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - vdst[src1.rawData() & 0x3f] = src0.rawData(); - - vdst.write(); - } // execute - // --- Inst_VOP3__V_BCNT_U32_B32 class methods --- - - Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bcnt_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BCNT_U32_B32 - - Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32() - { - } // ~Inst_VOP3__V_BCNT_U32_B32 - - // --- description from .arch file --- - // D.u = CountOneBits(S0.u) + S1.u. Bit count. - void - Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = popCount(src0[lane]) + src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MBCNT_LO_U32_B32 class methods --- - - Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mbcnt_lo_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MBCNT_LO_U32_B32 - - Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32() - { - } // ~Inst_VOP3__V_MBCNT_LO_U32_B32 - - // --- description from .arch file --- - // ThreadMask = (1 << ThreadPosition) - 1; - // D.u = CountOneBits(S0.u & ThreadMask[31:0]) + S1.u. - // Masked bit count, ThreadPosition is the position of this thread in the - // --- wavefront (in 0..63). - void - Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - uint64_t threadMask = 0; - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - threadMask = ((1LL << lane) - 1LL); - vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- - - Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_mbcnt_hi_u32_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_MBCNT_HI_U32_B32 - - Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32() - { - } // ~Inst_VOP3__V_MBCNT_HI_U32_B32 - - // --- description from .arch file --- - // ThreadMask = (1 << ThreadPosition) - 1; - // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u. - // Masked bit count, ThreadPosition is the position of this thread in the - // --- wavefront (in 0..63). - void - Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - uint64_t threadMask = 0; - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - threadMask = ((1LL << lane) - 1LL); - vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + - src1[lane]; - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHLREV_B64 class methods --- - - Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshlrev_b64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHLREV_B64 - - Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64() - { - } // ~Inst_VOP3__V_LSHLREV_B64 - - // --- description from .arch file --- - // D.u64 = S1.u64 << S0.u[5:0]. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] << bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_LSHRREV_B64 class methods --- - - Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_lshrrev_b64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_LSHRREV_B64 - - Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64() - { - } // ~Inst_VOP3__V_LSHRREV_B64 - - // --- description from .arch file --- - // D.u64 = S1.u64 >> S0.u[5:0]. - // The vacated bits are set to zero. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_ASHRREV_I64 class methods --- - - Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_ashrrev_i64", false) - { - setFlag(ALU); - } // Inst_VOP3__V_ASHRREV_I64 - - Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64() - { - } // ~Inst_VOP3__V_ASHRREV_I64 - - // --- description from .arch file --- - // D.u64 = signext(S1.u64) >> S0.u[5:0]. - // The vacated bits are set to the sign bit of the input value. - // SQ translates this to an internal SP opcode. - void - Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); - VecOperandU64 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] - = src1[lane] >> bits(src0[lane], 5, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_TRIG_PREOP_F64 class methods --- - - Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_trig_preop_f64", false) - { - setFlag(ALU); - setFlag(F64); - } // Inst_VOP3__V_TRIG_PREOP_F64 - - Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64() - { - } // ~Inst_VOP3__V_TRIG_PREOP_F64 - - // --- description from .arch file --- - // D.d = Look Up 2/PI (S0.d) with segment select S1.u[4:0]. This operation - // returns an aligned, double precision segment of 2/PI needed to do range - // reduction on S0.d (double-precision value). Multiple segments can be - // specified through S1.u[4:0]. Rounding is always round-to-zero. Large - // inputs (exp > 1968) are scaled to avoid loss of precision through - // denormalization. - void - Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_BFM_B32 class methods --- - - Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_bfm_b32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_BFM_B32 - - Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32() - { - } // ~Inst_VOP3__V_BFM_B32 - - // --- description from .arch file --- - // D.u = ((1<wavefront(); - ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); - ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); - VecOperandU32 vdst(gpuDynInst, instData.VDST); - - src0.readSrc(); - src1.readSrc(); - - /** - * input modifiers are supported by FP operations only - */ - assert(!(instData.ABS & 0x1)); - assert(!(instData.ABS & 0x2)); - assert(!(instData.ABS & 0x4)); - assert(!(extData.NEG & 0x1)); - assert(!(extData.NEG & 0x2)); - assert(!(extData.NEG & 0x4)); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (wf->execMask(lane)) { - vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1) - << bits(src1[lane], 4, 0); - } - } - - vdst.write(); - } // execute - // --- Inst_VOP3__V_CVT_PKNORM_I16_F32 class methods --- - - Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pknorm_i16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKNORM_I16_F32 - - Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32() - { - } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32 - - // --- description from .arch file --- - // D = {(snorm)S1.f, (snorm)S0.f}. - void - Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PKNORM_U16_F32 class methods --- - - Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pknorm_u16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKNORM_U16_F32 - - Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32() - { - } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32 - - // --- description from .arch file --- - // D = {(unorm)S1.f, (unorm)S0.f}. - void - Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PKRTZ_F16_F32 class methods --- - - Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32( - InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pkrtz_f16_f32", false) - { - setFlag(ALU); - setFlag(F32); - } // Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32() - { - } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32 - - // --- description from .arch file --- - // D = {flt32_to_flt16(S1.f),flt32_to_flt16(S0.f)}, with round-toward-zero - // --- regardless of current round mode setting in hardware. - // This opcode is intended for use with 16-bit compressed exports. - // See V_CVT_F16_F32 for a version that respects the current rounding mode. - void - Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PK_U16_U32 class methods --- - - Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pk_u16_u32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_CVT_PK_U16_U32 - - Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32() - { - } // ~Inst_VOP3__V_CVT_PK_U16_U32 - - // --- description from .arch file --- - // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}. - void - Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_VOP3__V_CVT_PK_I16_I32 class methods --- - - Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *iFmt) - : Inst_VOP3A(iFmt, "v_cvt_pk_i16_i32", false) - { - setFlag(ALU); - } // Inst_VOP3__V_CVT_PK_I16_I32 - - Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32() - { - } // ~Inst_VOP3__V_CVT_PK_I16_I32 - - // --- description from .arch file --- - // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}. - void - Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_U32 class methods --- - - Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_u32") - { - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicAdd); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_ADD_U32 - - Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32() - { - } // ~Inst_DS__DS_ADD_U32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] += DATA; - void - Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_ADD_U32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_ADD_U32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_SUB_U32 class methods --- - - Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_u32") - { - } // Inst_DS__DS_SUB_U32 - - Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32() - { - } // ~Inst_DS__DS_SUB_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_U32 class methods --- - - Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_u32") - { - } // Inst_DS__DS_RSUB_U32 - - Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32() - { - } // ~Inst_DS__DS_RSUB_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_U32 class methods --- - - Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_u32") - { - } // Inst_DS__DS_INC_U32 - - Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32() - { - } // ~Inst_DS__DS_INC_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_U32 class methods --- - - Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_u32") - { - } // Inst_DS__DS_DEC_U32 - - Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32() - { - } // ~Inst_DS__DS_DEC_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_I32 class methods --- - - Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_i32") - { - } // Inst_DS__DS_MIN_I32 - - Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32() - { - } // ~Inst_DS__DS_MIN_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_I32 class methods --- - - Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_i32") - { - } // Inst_DS__DS_MAX_I32 - - Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32() - { - } // ~Inst_DS__DS_MAX_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_U32 class methods --- - - Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_u32") - { - } // Inst_DS__DS_MIN_U32 - - Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32() - { - } // ~Inst_DS__DS_MIN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_U32 class methods --- - - Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_u32") - { - } // Inst_DS__DS_MAX_U32 - - Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32() - { - } // ~Inst_DS__DS_MAX_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_B32 class methods --- - - Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_b32") - { - } // Inst_DS__DS_AND_B32 - - Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32() - { - } // ~Inst_DS__DS_AND_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_B32 class methods --- - - Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_b32") - { - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicOr); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_OR_B32 - - Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32() - { - } // ~Inst_DS__DS_OR_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] |= DATA; - void - Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_OR_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_OR_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - - // --- Inst_DS__DS_XOR_B32 class methods --- - - Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_b32") - { - } // Inst_DS__DS_XOR_B32 - - Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32() - { - } // ~Inst_DS__DS_XOR_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_B32 class methods --- - - Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_b32") - { - } // Inst_DS__DS_MSKOR_B32 - - Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32() - { - } // ~Inst_DS__DS_MSKOR_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_B32 class methods --- - - Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B32 - - Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32() - { - } // ~Inst_DS__DS_WRITE_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] = DATA. - // Write dword. - void - Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE2_B32 class methods --- - - Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2_B32 - - Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32() - { - } // ~Inst_DS__DS_WRITE2_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR_BASE + OFFSET0 * 4] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2. - // Write 2 dwords. - void - Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4; - Addr offset1 = instData.OFFSET1 * 4; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_WRITE2ST64_B32 class methods --- - - Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2st64_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2ST64_B32 - - Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32() - { - } // ~Inst_DS__DS_WRITE2ST64_B32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2; - // Write 2 dwords. - void - Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 2] - = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4 * 64; - Addr offset1 = instData.OFFSET1 * 4 * 64; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_CMPST_B32 class methods --- - - Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_b32") - { - } // Inst_DS__DS_CMPST_B32 - - Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32() - { - } // ~Inst_DS__DS_CMPST_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP opcode. - void - Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_F32 class methods --- - - Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_f32") - { - setFlag(F32); - } // Inst_DS__DS_CMPST_F32 - - Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32() - { - } // ~Inst_DS__DS_CMPST_F32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP opcode. - void - Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_F32 class methods --- - - Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_F32 - - Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32() - { - } // ~Inst_DS__DS_MIN_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN. - void - Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_F32 class methods --- - - Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_F32 - - Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32() - { - } // ~Inst_DS__DS_MAX_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX. - void - Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_NOP class methods --- - - Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_nop") - { - setFlag(Nop); - } // Inst_DS__DS_NOP - - Inst_DS__DS_NOP::~Inst_DS__DS_NOP() - { - } // ~Inst_DS__DS_NOP - - // --- description from .arch file --- - // Do nothing. - void - Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst) - { - gpuDynInst->wavefront()->decLGKMInstsIssued(); - } // execute - // --- Inst_DS__DS_ADD_F32 class methods --- - - Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_f32") - { - setFlag(F32); - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicAdd); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_ADD_F32 - - Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32() - { - } // ~Inst_DS__DS_ADD_F32 - - // --- description from .arch file --- - // 32b: - // MEM[ADDR] += DATA; - // Floating point add that handles NaN/INF/denormal values. - void - Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandF32 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B8 class methods --- - - Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b8") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B8 - - Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8() - { - } // ~Inst_DS__DS_WRITE_B8 - - // --- description from .arch file --- - // MEM[ADDR] = DATA[7:0]. - // Byte write. - void - Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU8 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B8_D16_HI class methods --- - - Inst_DS__DS_WRITE_B8_D16_HI::Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b8_d16_hi") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B8_D16_HI - - Inst_DS__DS_WRITE_B8_D16_HI::~Inst_DS__DS_WRITE_B8_D16_HI() - { - } // ~Inst_DS__DS_WRITE_B8_D16_HI - - // --- description from .arch file --- - // MEM[ADDR] = DATA[23:16]. - // Byte write in to high word. - void - Inst_DS__DS_WRITE_B8_D16_HI::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU8 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = bits(data[lane], 23, 16); - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B8_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B8_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B16 class methods --- - - Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b16") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B16 - - Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16() - { - } // ~Inst_DS__DS_WRITE_B16 - - // --- description from .arch file --- - // MEM[ADDR] = DATA[15:0] - // Short write. - void - Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU16 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_ADD_RTN_U32 class methods --- - - Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_u32") - { - } // Inst_DS__DS_ADD_RTN_U32 - - Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32() - { - } // ~Inst_DS__DS_ADD_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_RTN_U32 class methods --- - - Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_rtn_u32") - { - } // Inst_DS__DS_SUB_RTN_U32 - - Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32() - { - } // ~Inst_DS__DS_SUB_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_RTN_U32 class methods --- - - Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_rtn_u32") - { - } // Inst_DS__DS_RSUB_RTN_U32 - - Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32() - { - } // ~Inst_DS__DS_RSUB_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_RTN_U32 class methods --- - - Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_rtn_u32") - { - } // Inst_DS__DS_INC_RTN_U32 - - Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32() - { - } // ~Inst_DS__DS_INC_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_RTN_U32 class methods --- - - Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_rtn_u32") - { - } // Inst_DS__DS_DEC_RTN_U32 - - Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32() - { - } // ~Inst_DS__DS_DEC_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_I32 class methods --- - - Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_i32") - { - } // Inst_DS__DS_MIN_RTN_I32 - - Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32() - { - } // ~Inst_DS__DS_MIN_RTN_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_I32 class methods --- - - Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_i32") - { - } // Inst_DS__DS_MAX_RTN_I32 - - Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32() - { - } // ~Inst_DS__DS_MAX_RTN_I32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_U32 class methods --- - - Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_u32") - { - } // Inst_DS__DS_MIN_RTN_U32 - - Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32() - { - } // ~Inst_DS__DS_MIN_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_U32 class methods --- - - Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_u32") - { - } // Inst_DS__DS_MAX_RTN_U32 - - Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32() - { - } // ~Inst_DS__DS_MAX_RTN_U32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_RTN_B32 class methods --- - - Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_rtn_b32") - { - } // Inst_DS__DS_AND_RTN_B32 - - Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32() - { - } // ~Inst_DS__DS_AND_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_RTN_B32 class methods --- - - Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_rtn_b32") - { - } // Inst_DS__DS_OR_RTN_B32 - - Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32() - { - } // ~Inst_DS__DS_OR_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_RTN_B32 class methods --- - - Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_rtn_b32") - { - } // Inst_DS__DS_XOR_RTN_B32 - - Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32() - { - } // ~Inst_DS__DS_XOR_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_RTN_B32 class methods --- - - Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_rtn_b32") - { - } // Inst_DS__DS_MSKOR_RTN_B32 - - Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32() - { - } // ~Inst_DS__DS_MSKOR_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG_RTN_B32 class methods --- - - Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg_rtn_b32") - { - } // Inst_DS__DS_WRXCHG_RTN_B32 - - Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG_RTN_B32 - - // --- description from .arch file --- - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - // Write-exchange operation. - void - Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2_RTN_B32 class methods --- - - Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32") - { - } // Inst_DS__DS_WRXCHG2_RTN_B32 - - Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG2_RTN_B32 - - // --- description from .arch file --- - // Write-exchange 2 separate dwords. - void - Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2ST64_RTN_B32 class methods --- - - Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32") - { - } // Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32() - { - } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32 - - // --- description from .arch file --- - // Write-exchange 2 separate dwords with a stride of 64 dwords. - void - Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_B32 class methods --- - - Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_b32") - { - } // Inst_DS__DS_CMPST_RTN_B32 - - Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32() - { - } // ~Inst_DS__DS_CMPST_RTN_B32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP opcode. - void - Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_F32 class methods --- - - Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_CMPST_RTN_F32 - - Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32() - { - } // ~Inst_DS__DS_CMPST_RTN_F32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP opcode. - void - Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_F32 class methods --- - - Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_RTN_F32 - - Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32() - { - } // ~Inst_DS__DS_MIN_RTN_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN. - void - Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_F32 class methods --- - - Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_RTN_F32 - - Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32() - { - } // ~Inst_DS__DS_MAX_RTN_F32 - - // --- description from .arch file --- - // 32b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX. - void - Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRAP_RTN_B32 class methods --- - - Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrap_rtn_b32") - { - } // Inst_DS__DS_WRAP_RTN_B32 - - Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32() - { - } // ~Inst_DS__DS_WRAP_RTN_B32 - - // --- description from .arch file --- - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2; - // RETURN_DATA = tmp. - void - Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_RTN_F32 class methods --- - - Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_RTN_F32 - - Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32() - { - } // ~Inst_DS__DS_ADD_RTN_F32 - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - // Floating point add that handles NaN/INF/denormal values. - void - Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_READ_B32 class methods --- - - Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B32 - - Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32() - { - } // ~Inst_DS__DS_READ_B32 - - // --- description from .arch file --- - // RETURN_DATA = MEM[ADDR]. - // Dword read. - void - Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ2_B32 class methods --- - - Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2_B32 - - Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32() - { - } // ~Inst_DS__DS_READ2_B32 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4]. - // Read 2 dwords. - void - Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 4; - Addr offset1 = instData.OFFSET1 * 4; - - initDualMemRead(gpuDynInst, offset0, offset1); - } // initiateAcc - - void - Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - // --- Inst_DS__DS_READ2ST64_B32 class methods --- - - Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2st64_b32") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2ST64_B32 - - Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32() - { - } // ~Inst_DS__DS_READ2ST64_B32 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64]. - // Read 2 dwords. - void - Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = (instData.OFFSET0 * 4 * 64); - Addr offset1 = (instData.OFFSET1 * 4 * 64); - - initDualMemRead(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } - // --- Inst_DS__DS_READ_I8 class methods --- - - Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_i8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_I8 - - Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8() - { - } // ~Inst_DS__DS_READ_I8 - - // --- description from .arch file --- - // RETURN_DATA = signext(MEM[ADDR][7:0]). - // Signed byte read. - void - Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_I8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_I8::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ_U8 class methods --- - - Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_u8") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_U8 - - Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8() - { - } // ~Inst_DS__DS_READ_U8 - - // --- description from .arch file --- - // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}. - // Unsigned byte read. - void - Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)(reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ_I16 class methods --- - - Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_i16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_I16 - - Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16() - { - } // ~Inst_DS__DS_READ_I16 - - // --- description from .arch file --- - // RETURN_DATA = signext(MEM[ADDR][15:0]). - // Signed short read. - void - Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_READ_U16 class methods --- - - Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_u16") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_U16 - - Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16() - { - } // ~Inst_DS__DS_READ_U16 - - // --- description from .arch file --- - // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}. - // Unsigned short read. - void - Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - void - Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)(reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_SWIZZLE_B32 class methods --- - - Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_swizzle_b32") - { - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_SWIZZLE_B32 - - Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32() - { - } // ~Inst_DS__DS_SWIZZLE_B32 - - // --- description from .arch file --- - // RETURN_DATA = swizzle(vgpr_data, offset1:offset0). - // Dword swizzle, no data is written to LDS memory; See ds_opcodes.docx for - // --- details. - void - Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->decLGKMInstsIssued(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - /** - * The "DS pattern" is comprised of both offset fields. That is, the - * swizzle pattern between lanes. Bit 15 of the DS pattern dictates - * which swizzle mode to use. There are two different swizzle - * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use - * QDMode else use Bit-masks mode. The remaining bits dictate how to - * swizzle the lanes. - * - * QDMode: Chunks the lanes into 4s and swizzles among them. - * Bits 7:6 dictate where lane 3 (of the current chunk) - * gets its date, 5:4 lane 2, etc. - * - * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks. - * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0 - * is the and_mask. Each lane is swizzled by performing - * the appropriate operation using these masks. - */ - VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0); - - data.read(); - - if (bits(ds_pattern, 15)) { - // QDMode - for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) { - /** - * This operation allows data sharing between groups - * of four consecutive threads. Note the increment by - * 4 in the for loop. - */ - if (gpuDynInst->exec_mask[lane]) { - int index0 = lane + bits(ds_pattern, 1, 0); - panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index0); - vdst[lane] - = gpuDynInst->exec_mask[index0] ? data[index0]: 0; - } - if (gpuDynInst->exec_mask[lane + 1]) { - int index1 = lane + bits(ds_pattern, 3, 2); - panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index1); - vdst[lane + 1] - = gpuDynInst->exec_mask[index1] ? data[index1]: 0; - } - if (gpuDynInst->exec_mask[lane + 2]) { - int index2 = lane + bits(ds_pattern, 5, 4); - panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index2); - vdst[lane + 2] - = gpuDynInst->exec_mask[index2] ? data[index2]: 0; - } - if (gpuDynInst->exec_mask[lane + 3]) { - int index3 = lane + bits(ds_pattern, 7, 6); - panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) " - "is out of bounds.\n", gpuDynInst->disassemble(), - index3); - vdst[lane + 3] - = gpuDynInst->exec_mask[index3] ? data[index3]: 0; - } - } - } else { - // Bit Mode - int and_mask = bits(ds_pattern, 4, 0); - int or_mask = bits(ds_pattern, 9, 5); - int xor_mask = bits(ds_pattern, 14, 10); - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - int index = (((lane & and_mask) | or_mask) ^ xor_mask); - // Adjust for the next 32 lanes. - if (lane > 31) { - index += 32; - } - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is " - "out of bounds.\n", gpuDynInst->disassemble(), - index); - vdst[lane] - = gpuDynInst->exec_mask[index] ? data[index] : 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - /** - * Similarly, this counter could build up over time, even across - * multiple wavefronts, and cause a deadlock. - */ - wf->rdLmReqsInPipe--; - } // execute - // --- Inst_DS__DS_PERMUTE_B32 class methods --- - - Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_permute_b32") - { - setFlag(MemoryRef); - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_PERMUTE_B32 - - Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32() - { - } // ~Inst_DS__DS_PERMUTE_B32 - - // --- description from .arch file --- - // Forward permute. - void - Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->decLGKMInstsIssued(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - addr.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - /** - * One of the offset fields can be used for the index. - * It is assumed OFFSET0 would be used, as OFFSET1 is - * typically only used for DS ops that operate on two - * disparate pieces of data. - */ - assert(!instData.OFFSET1); - /** - * The address provided is a byte address, but VGPRs are - * 4 bytes, so we must divide by 4 to get the actual VGPR - * index. Additionally, the index is calculated modulo the - * WF size, 64 in this case, so we simply extract bits 7-2. - */ - int index = bits(addr[lane] + instData.OFFSET0, 7, 2); - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " - "of bounds.\n", gpuDynInst->disassemble(), index); - /** - * If the shuffled index corresponds to a lane that is - * inactive then this instruction writes a 0 to the active - * lane in VDST. - */ - if (wf->execMask(index)) { - vdst[index] = data[lane]; - } else { - vdst[index] = 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - /** - * Similarly, this counter could build up over time, even across - * multiple wavefronts, and cause a deadlock. - */ - wf->rdLmReqsInPipe--; - } // execute - // --- Inst_DS__DS_BPERMUTE_B32 class methods --- - - Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_bpermute_b32") - { - setFlag(MemoryRef); - /** - * While this operation doesn't actually use DS storage we classify - * it as a load here because it does a writeback to a VGPR, which - * fits in better with the LDS pipeline logic. - */ - setFlag(Load); - } // Inst_DS__DS_BPERMUTE_B32 - - Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32() - { - } // ~Inst_DS__DS_BPERMUTE_B32 - - // --- description from .arch file --- - // Backward permute. - void - Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - wf->decLGKMInstsIssued(); - - if (gpuDynInst->exec_mask.none()) { - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit() - ->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data(gpuDynInst, extData.DATA0); - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - addr.read(); - data.read(); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - /** - * One of the offset fields can be used for the index. - * It is assumed OFFSET0 would be used, as OFFSET1 is - * typically only used for DS ops that operate on two - * disparate pieces of data. - */ - assert(!instData.OFFSET1); - /** - * The address provided is a byte address, but VGPRs are - * 4 bytes, so we must divide by 4 to get the actual VGPR - * index. Additionally, the index is calculated modulo the - * WF size, 64 in this case, so we simply extract bits 7-2. - */ - int index = bits(addr[lane] + instData.OFFSET0, 7, 2); - panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out " - "of bounds.\n", gpuDynInst->disassemble(), index); - /** - * If the shuffled index corresponds to a lane that is - * inactive then this instruction writes a 0 to the active - * lane in VDST. - */ - if (wf->execMask(index)) { - vdst[lane] = data[index]; - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - - /** - * This is needed because we treat this instruction as a load - * but it's not an actual memory request. - * Without this, the destination register never gets marked as - * free, leading to a possible deadlock - */ - wf->computeUnit->vrf[wf->simdId]-> - scheduleWriteOperandsFromLoad(wf, gpuDynInst); - /** - * Similarly, this counter could build up over time, even across - * multiple wavefronts, and cause a deadlock. - */ - wf->rdLmReqsInPipe--; - } // execute - - // --- Inst_DS__DS_ADD_U64 class methods --- - - Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_u64") - { - setFlag(MemoryRef); - setFlag(GroupSegment); - setFlag(AtomicAdd); - setFlag(AtomicNoReturn); - } // Inst_DS__DS_ADD_U64 - - Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64() - { - } // ~Inst_DS__DS_ADD_U64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR] += DATA[0:1]; - void - Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initAtomicAccess(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_SUB_U64 class methods --- - - Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_u64") - { - } // Inst_DS__DS_SUB_U64 - - Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64() - { - } // ~Inst_DS__DS_SUB_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_U64 class methods --- - - Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_u64") - { - } // Inst_DS__DS_RSUB_U64 - - Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64() - { - } // ~Inst_DS__DS_RSUB_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_U64 class methods --- - - Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_u64") - { - } // Inst_DS__DS_INC_U64 - - Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64() - { - } // ~Inst_DS__DS_INC_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_U64 class methods --- - - Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_u64") - { - } // Inst_DS__DS_DEC_U64 - - Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64() - { - } // ~Inst_DS__DS_DEC_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_I64 class methods --- - - Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_i64") - { - } // Inst_DS__DS_MIN_I64 - - Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64() - { - } // ~Inst_DS__DS_MIN_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_I64 class methods --- - - Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_i64") - { - } // Inst_DS__DS_MAX_I64 - - Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64() - { - } // ~Inst_DS__DS_MAX_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_U64 class methods --- - - Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_u64") - { - } // Inst_DS__DS_MIN_U64 - - Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64() - { - } // ~Inst_DS__DS_MIN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_U64 class methods --- - - Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_u64") - { - } // Inst_DS__DS_MAX_U64 - - Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64() - { - } // ~Inst_DS__DS_MAX_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_B64 class methods --- - - Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_b64") - { - } // Inst_DS__DS_AND_B64 - - Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64() - { - } // ~Inst_DS__DS_AND_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_B64 class methods --- - - Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_b64") - { - } // Inst_DS__DS_OR_B64 - - Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64() - { - } // ~Inst_DS__DS_OR_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_B64 class methods --- - - Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_b64") - { - } // Inst_DS__DS_XOR_B64 - - Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64() - { - } // ~Inst_DS__DS_XOR_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_B64 class methods --- - - Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_b64") - { - } // Inst_DS__DS_MSKOR_B64 - - Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64() - { - } // ~Inst_DS__DS_MSKOR_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_B64 class methods --- - - Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B64 - - Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64() - { - } // ~Inst_DS__DS_WRITE_B64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR] = DATA. - // Write qword. - void - Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data(gpuDynInst, extData.DATA0); - - addr.read(); - data.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE2_B64 class methods --- - - Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2_B64 - - Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64() - { - } // ~Inst_DS__DS_WRITE2_B64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR_BASE + OFFSET0 * 8] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2. - // Write 2 qwords. - void - Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8; - Addr offset1 = instData.OFFSET1 * 8; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_WRITE2ST64_B64 class methods --- - - Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write2st64_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE2ST64_B64 - - Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64() - { - } // ~Inst_DS__DS_WRITE2ST64_B64 - - // --- description from .arch file --- - // 64b: - // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA; - // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2; - // Write 2 qwords. - void - Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU64 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU64 data1(gpuDynInst, extData.DATA1); - - addr.read(); - data0.read(); - data1.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1] = data1[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8 * 64; - Addr offset1 = instData.OFFSET1 * 8 * 64; - - initDualMemWrite(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_WRITE2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - } - // --- Inst_DS__DS_CMPST_B64 class methods --- - - Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_b64") - { - } // Inst_DS__DS_CMPST_B64 - - Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64() - { - } // ~Inst_DS__DS_CMPST_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_F64 class methods --- - - Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_f64") - { - setFlag(F64); - } // Inst_DS__DS_CMPST_F64 - - Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64() - { - } // ~Inst_DS__DS_CMPST_F64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_F64 class methods --- - - Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_F64 - - Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64() - { - } // ~Inst_DS__DS_MIN_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN_X2. - void - Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_F64 class methods --- - - Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_F64 - - Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64() - { - } // ~Inst_DS__DS_MAX_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX_X2. - void - Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_RTN_U64 class methods --- - - Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_rtn_u64") - { - } // Inst_DS__DS_ADD_RTN_U64 - - Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64() - { - } // ~Inst_DS__DS_ADD_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_RTN_U64 class methods --- - - Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_rtn_u64") - { - } // Inst_DS__DS_SUB_RTN_U64 - - Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64() - { - } // ~Inst_DS__DS_SUB_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_RTN_U64 class methods --- - - Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_rtn_u64") - { - } // Inst_DS__DS_RSUB_RTN_U64 - - Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64() - { - } // ~Inst_DS__DS_RSUB_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA - MEM[ADDR]; - // RETURN_DATA = tmp. - // Subtraction with reversed operands. - void - Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_RTN_U64 class methods --- - - Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_rtn_u64") - { - } // Inst_DS__DS_INC_RTN_U64 - - Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64() - { - } // ~Inst_DS__DS_INC_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_RTN_U64 class methods --- - - Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_rtn_u64") - { - } // Inst_DS__DS_DEC_RTN_U64 - - Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64() - { - } // ~Inst_DS__DS_DEC_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_I64 class methods --- - - Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_i64") - { - } // Inst_DS__DS_MIN_RTN_I64 - - Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64() - { - } // ~Inst_DS__DS_MIN_RTN_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_I64 class methods --- - - Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_i64") - { - } // Inst_DS__DS_MAX_RTN_I64 - - Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64() - { - } // ~Inst_DS__DS_MAX_RTN_I64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_U64 class methods --- - - Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_u64") - { - } // Inst_DS__DS_MIN_RTN_U64 - - Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64() - { - } // ~Inst_DS__DS_MIN_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_U64 class methods --- - - Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_u64") - { - } // Inst_DS__DS_MAX_RTN_U64 - - Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64() - { - } // ~Inst_DS__DS_MAX_RTN_U64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_RTN_B64 class methods --- - - Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_rtn_b64") - { - } // Inst_DS__DS_AND_RTN_B64 - - Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64() - { - } // ~Inst_DS__DS_AND_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_RTN_B64 class methods --- - - Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_rtn_b64") - { - } // Inst_DS__DS_OR_RTN_B64 - - Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64() - { - } // ~Inst_DS__DS_OR_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_RTN_B64 class methods --- - - Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_rtn_b64") - { - } // Inst_DS__DS_XOR_RTN_B64 - - Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64() - { - } // ~Inst_DS__DS_XOR_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MSKOR_RTN_B64 class methods --- - - Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_mskor_rtn_b64") - { - } // Inst_DS__DS_MSKOR_RTN_B64 - - Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64() - { - } // ~Inst_DS__DS_MSKOR_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2; - // RETURN_DATA = tmp. - // Masked dword OR, D0 contains the mask and D1 contains the new value. - void - Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG_RTN_B64 class methods --- - - Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg_rtn_b64") - { - } // Inst_DS__DS_WRXCHG_RTN_B64 - - Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG_RTN_B64 - - // --- description from .arch file --- - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - // Write-exchange operation. - void - Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2_RTN_B64 class methods --- - - Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64") - { - } // Inst_DS__DS_WRXCHG2_RTN_B64 - - Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG2_RTN_B64 - - // --- description from .arch file --- - // Write-exchange 2 separate qwords. - void - Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRXCHG2ST64_RTN_B64 class methods --- - - Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64") - { - } // Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64() - { - } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64 - - // --- description from .arch file --- - // Write-exchange 2 qwords with a stride of 64 qwords. - void - Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_B64 class methods --- - - Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_b64") - { - } // Inst_DS__DS_CMPST_RTN_B64 - - Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64() - { - } // ~Inst_DS__DS_CMPST_RTN_B64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Compare and store. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_CMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CMPST_RTN_F64 class methods --- - - Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_cmpst_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_CMPST_RTN_F64 - - Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64() - { - } // ~Inst_DS__DS_CMPST_RTN_F64 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA2; - // cmp = DATA; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - // Floating point compare and store that handles NaN/INF/denormal values. - // Caution, the order of src and cmp are the *opposite* of the - // --- BUFFER_ATOMIC_FCMPSWAP_X2 opcode. - void - Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_RTN_F64 class methods --- - - Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_RTN_F64 - - Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64() - { - } // ~Inst_DS__DS_MIN_RTN_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (cmp < tmp) ? src : tmp. - // Floating point minimum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMIN_X2. - void - Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_RTN_F64 class methods --- - - Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_rtn_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_RTN_F64 - - Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64() - { - } // ~Inst_DS__DS_MAX_RTN_F64 - - // --- description from .arch file --- - // 64b. - // tmp = MEM[ADDR]; - // src = DATA; - // cmp = DATA2; - // MEM[ADDR] = (tmp > cmp) ? src : tmp. - // Floating point maximum that handles NaN/INF/denormal values. - // Note that this opcode is slightly more general-purpose than - // --- BUFFER_ATOMIC_FMAX_X2. - void - Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_READ_B64 class methods --- - - Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B64 - - Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64() - { - } // ~Inst_DS__DS_READ_B64 - - // --- description from .arch file --- - // RETURN_DATA = MEM[ADDR]. - // Read 1 qword. - void - Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } // completeAcc - // --- Inst_DS__DS_READ2_B64 class methods --- - - Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2_B64 - - Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64() - { - } // ~Inst_DS__DS_READ2_B64 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8]. - // Read 2 qwords. - void - Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0 * 8; - Addr offset1 = instData.OFFSET1 * 8; - - initDualMemRead(gpuDynInst, offset0, offset1); - } // initiateAcc - - void - Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst0(gpuDynInst, extData.VDST); - VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - // --- Inst_DS__DS_READ2ST64_B64 class methods --- - - Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read2st64_b64") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ2ST64_B64 - - Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64() - { - } // ~Inst_DS__DS_READ2ST64_B64 - - // --- description from .arch file --- - // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64]; - // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64]. - // Read 2 qwords. - void - Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = (instData.OFFSET0 * 8 * 64); - Addr offset1 = (instData.OFFSET1 * 8 * 64); - - initDualMemRead(gpuDynInst, offset0, offset1); - } - - void - Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst0(gpuDynInst, extData.VDST); - VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } - } - - vdst0.write(); - vdst1.write(); - } - // --- Inst_DS__DS_CONDXCHG32_RTN_B64 class methods --- - - Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_condxchg32_rtn_b64") - { - } // Inst_DS__DS_CONDXCHG32_RTN_B64 - - Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64() - { - } // ~Inst_DS__DS_CONDXCHG32_RTN_B64 - - // --- description from .arch file --- - // Conditional write exchange. - void - Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_SRC2_U32 class methods --- - - Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_u32") - { - } // Inst_DS__DS_ADD_SRC2_U32 - - Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32() - { - } // ~Inst_DS__DS_ADD_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] + MEM[B]. - void - Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_SRC2_U32 class methods --- - - Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_src2_u32") - { - } // Inst_DS__DS_SUB_SRC2_U32 - - Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32() - { - } // ~Inst_DS__DS_SUB_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] - MEM[B]. - void - Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_SRC2_U32 class methods --- - - Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_src2_u32") - { - } // Inst_DS__DS_RSUB_SRC2_U32 - - Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32() - { - } // ~Inst_DS__DS_RSUB_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] - MEM[A]. - void - Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_SRC2_U32 class methods --- - - Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_src2_u32") - { - } // Inst_DS__DS_INC_SRC2_U32 - - Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32() - { - } // ~Inst_DS__DS_INC_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). - void - Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_SRC2_U32 class methods --- - - Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_src2_u32") - { - } // Inst_DS__DS_DEC_SRC2_U32 - - Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32() - { - } // ~Inst_DS__DS_DEC_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). - // Uint decrement. - void - Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_I32 class methods --- - - Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_i32") - { - } // Inst_DS__DS_MIN_SRC2_I32 - - Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32() - { - } // ~Inst_DS__DS_MIN_SRC2_I32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_I32 class methods --- - - Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_i32") - { - } // Inst_DS__DS_MAX_SRC2_I32 - - Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32() - { - } // ~Inst_DS__DS_MAX_SRC2_I32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_U32 class methods --- - - Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_u32") - { - } // Inst_DS__DS_MIN_SRC2_U32 - - Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32() - { - } // ~Inst_DS__DS_MIN_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_U32 class methods --- - - Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_u32") - { - } // Inst_DS__DS_MAX_SRC2_U32 - - Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32() - { - } // ~Inst_DS__DS_MAX_SRC2_U32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_SRC2_B32 class methods --- - - Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_src2_b32") - { - } // Inst_DS__DS_AND_SRC2_B32 - - Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32() - { - } // ~Inst_DS__DS_AND_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] & MEM[B]. - void - Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_SRC2_B32 class methods --- - - Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_src2_b32") - { - } // Inst_DS__DS_OR_SRC2_B32 - - Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32() - { - } // ~Inst_DS__DS_OR_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] | MEM[B]. - void - Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_SRC2_B32 class methods --- - - Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_src2_b32") - { - } // Inst_DS__DS_XOR_SRC2_B32 - - Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32() - { - } // ~Inst_DS__DS_XOR_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] ^ MEM[B]. - void - Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_SRC2_B32 class methods --- - - Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_src2_b32") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_SRC2_B32 - - Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32() - { - } // ~Inst_DS__DS_WRITE_SRC2_B32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B]. - // Write dword. - void - Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_F32 class methods --- - - Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_MIN_SRC2_F32 - - Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32() - { - } // ~Inst_DS__DS_MIN_SRC2_F32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_F32 class methods --- - - Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_MAX_SRC2_F32 - - Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32() - { - } // ~Inst_DS__DS_MAX_SRC2_F32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_SRC2_F32 class methods --- - - Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_f32") - { - setFlag(F32); - } // Inst_DS__DS_ADD_SRC2_F32 - - Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32() - { - } // ~Inst_DS__DS_ADD_SRC2_F32 - - // --- description from .arch file --- - // 32b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] + MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_RELEASE_ALL class methods --- - - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL( - InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_release_all") - { - } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL() - { - } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL - - // --- description from .arch file --- - // GDS Only: The GWS resource (rid) indicated will process this opcode by - // updating the counter and labeling the specified resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // //Incr the state counter of the resource - // state.counter[rid] = state.wave_in_queue; - // state.type = SEMAPHORE; - // return rd_done; //release calling wave - // This action will release ALL queued waves; it Will have no effect if no - // --- waves are present. - void - Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_INIT class methods --- - - Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_init") - { - } // Inst_DS__DS_GWS_INIT - - Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT() - { - } // ~Inst_DS__DS_GWS_INIT - - // --- description from .arch file --- - // GDS Only: Initialize a barrier or semaphore resource. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // //Get the value to use in init - // index = find_first_valid(vector mask) - // value = DATA[thread: index] - // //Set the state of the resource - // state.counter[rid] = lsb(value); //limit #waves - // state.flag[rid] = 0; - // return rd_done; //release calling wave - void - Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_V class methods --- - - Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_v") - { - } // Inst_DS__DS_GWS_SEMA_V - - Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V() - { - } // ~Inst_DS__DS_GWS_SEMA_V - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // updating the counter and labeling the resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // //Incr the state counter of the resource - // state.counter[rid]++; - // state.type = SEMAPHORE; - // return rd_done; //release calling wave - // This action will release one waved if any are queued in this resource. - void - Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_BR class methods --- - - Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_br") - { - } // Inst_DS__DS_GWS_SEMA_BR - - Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR() - { - } // ~Inst_DS__DS_GWS_SEMA_BR - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // updating the counter by the bulk release delivered count and labeling - // the resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // index = find first valid (vector mask) - // count = DATA[thread: index]; - // //Add count to the resource state counter - // state.counter[rid] += count; - // state.type = SEMAPHORE; - // return rd_done; //release calling wave - // This action will release count number of waves, immediately if queued, - // or as they arrive from the noted resource. - void - Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_SEMA_P class methods --- - - Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_sema_p") - { - } // Inst_DS__DS_GWS_SEMA_P - - Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P() - { - } // ~Inst_DS__DS_GWS_SEMA_P - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // queueing it until counter enables a release and then decrementing the - // counter of the resource as a semaphore. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + offset0[5:0]; - // state.type = SEMAPHORE; - // ENQUEUE until(state[rid].counter > 0) - // state[rid].counter--; - // return rd_done - void - Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_GWS_BARRIER class methods --- - - Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_gws_barrier") - { - } // Inst_DS__DS_GWS_BARRIER - - Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER() - { - } // ~Inst_DS__DS_GWS_BARRIER - - // --- description from .arch file --- - // GDS Only: The GWS resource indicated will process this opcode by - // queueing it until barrier is satisfied. The number of waves needed is - // passed in as DATA of first valid thread. - // //Determine the GWS resource to work on - // rid[5:0] = SH_SX_EXPCMD.gds_base[5:0] + OFFSET0[5:0]; - // index = find first valid (vector mask); - // value = DATA[thread: index]; - // // Input Decision Machine - // state.type[rid] = BARRIER; - // if(state[rid].counter <= 0) { - // thread[rid].flag = state[rid].flag; - // ENQUEUE; - // state[rid].flag = !state.flag; - // state[rid].counter = value; - // return rd_done; - // } else { - // state[rid].counter--; - // thread.flag = state[rid].flag; - // ENQUEUE; - // } - // Since the waves deliver the count for the next barrier, this function - // can have a different size barrier for each occurrence. - // // Release Machine - // if(state.type == BARRIER) { - // if(state.flag != thread.flag) { - // return rd_done; - // } - // } - void - Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_CONSUME class methods --- - - Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_consume") - { - } // Inst_DS__DS_CONSUME - - Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME() - { - } // ~Inst_DS__DS_CONSUME - - // --- description from .arch file --- - // LDS & GDS. Subtract (count_bits(exec_mask)) from the value stored in DS - // memory at (M0.base + instr_offset). Return the pre-operation value to - // VGPRs. - void - Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_APPEND class methods --- - - Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_append") - { - } // Inst_DS__DS_APPEND - - Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND() - { - } // ~Inst_DS__DS_APPEND - - // --- description from .arch file --- - // LDS & GDS. Add (count_bits(exec_mask)) to the value stored in DS memory - // at (M0.base + instr_offset). Return the pre-operation value to VGPRs. - void - Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ORDERED_COUNT class methods --- - - Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_ordered_count") - { - } // Inst_DS__DS_ORDERED_COUNT - - Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT() - { - } // ~Inst_DS__DS_ORDERED_COUNT - - // --- description from .arch file --- - // GDS-only. Add (count_bits(exec_mask)) to one of 4 dedicated - // ordered-count counters (aka 'packers'). Additional bits of instr.offset - // field are overloaded to hold packer-id, 'last'. - void - Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_ADD_SRC2_U64 class methods --- - - Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_add_src2_u64") - { - } // Inst_DS__DS_ADD_SRC2_U64 - - Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64() - { - } // ~Inst_DS__DS_ADD_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] + MEM[B]. - void - Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_SUB_SRC2_U64 class methods --- - - Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_sub_src2_u64") - { - } // Inst_DS__DS_SUB_SRC2_U64 - - Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64() - { - } // ~Inst_DS__DS_SUB_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] - MEM[B]. - void - Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_RSUB_SRC2_U64 class methods --- - - Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_rsub_src2_u64") - { - } // Inst_DS__DS_RSUB_SRC2_U64 - - Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64() - { - } // ~Inst_DS__DS_RSUB_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B] - MEM[A]. - void - Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_INC_SRC2_U64 class methods --- - - Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_inc_src2_u64") - { - } // Inst_DS__DS_INC_SRC2_U64 - - Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64() - { - } // ~Inst_DS__DS_INC_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1). - void - Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_DEC_SRC2_U64 class methods --- - - Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_dec_src2_u64") - { - } // Inst_DS__DS_DEC_SRC2_U64 - - Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64() - { - } // ~Inst_DS__DS_DEC_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1). - // Uint decrement. - void - Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_I64 class methods --- - - Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_i64") - { - } // Inst_DS__DS_MIN_SRC2_I64 - - Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64() - { - } // ~Inst_DS__DS_MIN_SRC2_I64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_I64 class methods --- - - Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_i64") - { - } // Inst_DS__DS_MAX_SRC2_I64 - - Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64() - { - } // ~Inst_DS__DS_MAX_SRC2_I64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_U64 class methods --- - - Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_u64") - { - } // Inst_DS__DS_MIN_SRC2_U64 - - Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64() - { - } // ~Inst_DS__DS_MIN_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = min(MEM[A], MEM[B]). - void - Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_U64 class methods --- - - Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_u64") - { - } // Inst_DS__DS_MAX_SRC2_U64 - - Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64() - { - } // ~Inst_DS__DS_MAX_SRC2_U64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = max(MEM[A], MEM[B]). - void - Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_AND_SRC2_B64 class methods --- - - Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_and_src2_b64") - { - } // Inst_DS__DS_AND_SRC2_B64 - - Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64() - { - } // ~Inst_DS__DS_AND_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] & MEM[B]. - void - Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_OR_SRC2_B64 class methods --- - - Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_or_src2_b64") - { - } // Inst_DS__DS_OR_SRC2_B64 - - Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64() - { - } // ~Inst_DS__DS_OR_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] | MEM[B]. - void - Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_XOR_SRC2_B64 class methods --- - - Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_xor_src2_b64") - { - } // Inst_DS__DS_XOR_SRC2_B64 - - Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64() - { - } // ~Inst_DS__DS_XOR_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[A] ^ MEM[B]. - void - Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_SRC2_B64 class methods --- - - Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_src2_b64") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_SRC2_B64 - - Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64() - { - } // ~Inst_DS__DS_WRITE_SRC2_B64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = MEM[B]. - // Write qword. - void - Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MIN_SRC2_F64 class methods --- - - Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_min_src2_f64") - { - setFlag(F64); - } // Inst_DS__DS_MIN_SRC2_F64 - - Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64() - { - } // ~Inst_DS__DS_MIN_SRC2_F64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_MAX_SRC2_F64 class methods --- - - Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_max_src2_f64") - { - setFlag(F64); - } // Inst_DS__DS_MAX_SRC2_F64 - - Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64() - { - } // ~Inst_DS__DS_MAX_SRC2_F64 - - // --- description from .arch file --- - // 64b: - // A = ADDR_BASE; - // B = A + 4*(offset1[7] ? {A[31],A[31:17]} : - // --- {offset1[6],offset1[6:0],offset0}); - // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A]. - // Float, handles NaN/INF/denorm. - void - Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_DS__DS_WRITE_B96 class methods --- - - Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b96") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B96 - - Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96() - { - } // ~Inst_DS__DS_WRITE_B96 - - // --- description from .arch file --- - // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0]. - // Tri-dword write. - void - Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B96::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite<3>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B96::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_WRITE_B128 class methods --- - - Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_write_b128") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_DS__DS_WRITE_B128 - - Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128() - { - } // ~Inst_DS__DS_WRITE_B128 - - // --- description from .arch file --- - // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0]. - // Qword write. - void - Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - ConstVecOperandU32 data0(gpuDynInst, extData.DATA0); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA0 + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA0 + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.DATA0 + 3); - - addr.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - calcAddr(gpuDynInst, addr); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; - } - } - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_WRITE_B128::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemWrite<4>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_WRITE_B128::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_DS__DS_READ_B96 class methods --- - - Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b96") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B96 - - Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96() - { - } // ~Inst_DS__DS_READ_B96 - - // --- description from .arch file --- - // Tri-dword read. - void - Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B96::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead<3>(gpuDynInst, offset); - } - - void - Inst_DS__DS_READ_B96::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } - // --- Inst_DS__DS_READ_B128 class methods --- - - Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt) - : Inst_DS(iFmt, "ds_read_b128") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_DS__DS_READ_B128 - - Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128() - { - } // ~Inst_DS__DS_READ_B128 - - // --- description from .arch file --- - // Qword read. - void - Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set( - gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24))); - ConstVecOperandU32 addr(gpuDynInst, extData.ADDR); - - addr.read(); - - calcAddr(gpuDynInst, addr); - - gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_DS__DS_READ_B128::initiateAcc(GPUDynInstPtr gpuDynInst) - { - Addr offset0 = instData.OFFSET0; - Addr offset1 = instData.OFFSET1; - Addr offset = (offset1 << 8) | offset0; - - initMemRead<4>(gpuDynInst, offset); - } // initiateAcc - - void - Inst_DS__DS_READ_B128::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_X - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X - - // --- description from .arch file --- - // Untyped buffer load 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY - - // --- description from .arch file --- - // Untyped buffer load 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ - - // --- description from .arch file --- - // Untyped buffer load 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW - - // --- description from .arch file --- - // Untyped buffer load 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_X - ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_X - - Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X - - // --- description from .arch file --- - // Untyped buffer store 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_XY - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY - - // --- description from .arch file --- - // Untyped buffer store 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ - - // --- description from .arch file --- - // Untyped buffer store 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW - - // --- description from .arch file --- - // Untyped buffer store 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X - - // --- description from .arch file --- - // Untyped buffer load 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY - - // --- description from .arch file --- - // Untyped buffer load 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Untyped buffer load 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods --- - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW() - { - } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Untyped buffer load 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_x") - { - setFlag(Store); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X - - // --- description from .arch file --- - // Untyped buffer store 1 dword with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY - - // --- description from .arch file --- - // Untyped buffer store 2 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Untyped buffer store 3 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods --- - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW() - { - } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Untyped buffer store 4 dwords with format conversion. - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods --- - - Inst_MUBUF__BUFFER_LOAD_UBYTE - ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_ubyte") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_UBYTE - - Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE() - { - } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE - - // --- description from .arch file --- - // Untyped buffer load unsigned byte (zero extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // execute - - // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods --- - - Inst_MUBUF__BUFFER_LOAD_SBYTE - ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_sbyte") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_SBYTE - - Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE() - { - } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE - - // --- description from .arch file --- - // Untyped buffer load signed byte (sign extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods --- - - Inst_MUBUF__BUFFER_LOAD_USHORT - ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_ushort") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_USHORT - - Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT() - { - } // ~Inst_MUBUF__BUFFER_LOAD_USHORT - - // --- description from .arch file --- - // Untyped buffer load unsigned short (zero extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // execute - - // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods --- - - Inst_MUBUF__BUFFER_LOAD_SSHORT - ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_sshort") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_LOAD_SSHORT - - Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT() - { - } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT - - // --- description from .arch file --- - // Untyped buffer load signed short (sign extend to VGPR destination). - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORD - ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORD - - Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORD - - // --- description from .arch file --- - // Untyped buffer load dword. - void - Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } else { - vdst[lane] = 0; - } - } - } - - vdst.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORDX2 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer load 2 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<2>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 2 + 1]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORDX3 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx3") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer load 3 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<3>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - vdst2[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods --- - - Inst_MUBUF__BUFFER_LOAD_DWORDX4 - ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4() - { - } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer load 4 dwords. - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - - rsrcDesc.read(); - offset.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDATA); - VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - if (!oobMask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } else { - vdst0[lane] = 0; - vdst1[lane] = 0; - vdst2[lane] = 0; - vdst3[lane] = 0; - } - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods --- - - Inst_MUBUF__BUFFER_STORE_BYTE - ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_byte") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_BYTE - - Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE() - { - } // ~Inst_MUBUF__BUFFER_STORE_BYTE - - // --- description from .arch file --- - // Untyped buffer store byte. - void - Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandI8 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods --- - - Inst_MUBUF__BUFFER_STORE_SHORT - ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_short") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_SHORT - - Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT() - { - } // ~Inst_MUBUF__BUFFER_STORE_SHORT - - // --- description from .arch file --- - // Untyped buffer store short. - void - Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandI16 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORD:: - Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORD - - Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORD - - // --- description from .arch file --- - // Untyped buffer store dword. - void - Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data(gpuDynInst, extData.VDATA); - - rsrcDesc.read(); - offset.read(); - data.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORDX2 - ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX2 - - Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer store 2 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<2>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORDX3 - ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx3") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX3 - - Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer store 3 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - data2.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] - = data2[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<3>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods --- - - Inst_MUBUF__BUFFER_STORE_DWORDX4 - ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - if (instData.LDS) { - setFlag(GroupSegment); - } else { - setFlag(GlobalSegment); - } - } // Inst_MUBUF__BUFFER_STORE_DWORDX4 - - Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4() - { - } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer store 4 dwords. - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); - ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3); - - rsrcDesc.read(); - offset.read(); - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane * 4] - = data0[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] - = data1[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] - = data2[lane]; - (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 3] - = data3[lane]; - } - } - } // execute - - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods --- - - Inst_MUBUF__BUFFER_STORE_LDS_DWORD - ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_store_lds_dword") - { - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD() - { - } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD - - // --- description from .arch file --- - // Store one DWORD from LDS memory to system memory without utilizing - // VGPRs. - void - Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_WBINVL1 class methods --- - - Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_wbinvl1") - { - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemSync); - setFlag(GlobalSegment); - setFlag(MemSync); - } // Inst_MUBUF__BUFFER_WBINVL1 - - Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1() - { - } // ~Inst_MUBUF__BUFFER_WBINVL1 - - // --- description from .arch file --- - // Write back and invalidate the shader L1. - // Always returns ACK to shader. - void - Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } // execute - - void - Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst) - { - // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we - // need to precisely communicate the writeback-invalidate operation to - // the new gfx10 coalescer rather than sending AcquireRelease markers. - // The SICoalescer would need to be updated appropriately as well. - injectGlobalMemFence(gpuDynInst); - } // initiateAcc - void - Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods --- - - Inst_MUBUF__BUFFER_WBINVL1_VOL - ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt) - : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") { - // This instruction is same as buffer_wbinvl1 instruction except this - // instruction only invalidate L1 shader line with MTYPE SC and GC. - // Since Hermes L1 (TCP) do not differentiate between its cache lines, - // this instruction currently behaves (and implemented ) exactly like - // buffer_wbinvl1 instruction. - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemSync); - setFlag(GlobalSegment); - setFlag(MemSync); - } // Inst_MUBUF__BUFFER_WBINVL1_VOL - - Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL() - { - } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL - - // --- description from .arch file --- - // Write back and invalidate the shader L1 only for lines that are marked - // --- volatile. - // Always returns ACK to shader. - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { - gpuDynInst->computeUnit()->globalMemoryPipe. - issueRequest(gpuDynInst); - } else { - fatal("Unsupported scope for flat instruction.\n"); - } - } // execute - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) - { - injectGlobalMemFence(gpuDynInst); - } // initiateAcc - void - Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SWAP - ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SWAP - - Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); - ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); - ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); - ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); - ConstVecOperandU32 src(gpuDynInst, extData.VDATA); - ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1); - - rsrcDesc.read(); - offset.read(); - src.read(); - cmp.read(); - - int inst_offset = instData.OFFSET; - - if (!instData.IDXEN && !instData.OFFEN) { - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (!instData.IDXEN && instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr0, addr1, rsrcDesc, offset, inst_offset); - } else if (instData.IDXEN && !instData.OFFEN) { - addr0.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } else { - addr0.read(); - addr1.read(); - calcAddr(gpuDynInst, - addr1, addr0, rsrcDesc, offset, inst_offset); - } - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = src[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); - } // execute - - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDATA); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_ADD - ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_ADD - - Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SUB - ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SUB - - Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMIN - ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMIN - - Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMIN - ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMIN - - Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMAX - ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMAX - - Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMAX - ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMAX - - Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_AND - ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_AND - - Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_AND - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_OR - ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_OR - - Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_OR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_XOR - ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_XOR - - Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_INC - ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_INC - - Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_INC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_DEC - ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_DEC - - Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA[0:1]; - // cmp = DATA[2:3]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_add_x2") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_and_x2") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_or_x2") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods --- - - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt) - : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2() - { - } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_X class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X - - // --- description from .arch file --- - // Typed buffer load 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY - - // --- description from .arch file --- - // Typed buffer load 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ - - // --- description from .arch file --- - // Typed buffer load 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW - - // --- description from .arch file --- - // Typed buffer load 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_X class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_X - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X - - // --- description from .arch file --- - // Typed buffer store 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XY class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY - - // --- description from .arch file --- - // Typed buffer store 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ - - // --- description from .arch file --- - // Typed buffer store 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW - - // --- description from .arch file --- - // Typed buffer store 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X:: - ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X - - // --- description from .arch file --- - // Typed buffer load 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY - - // --- description from .arch file --- - // Typed buffer load 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ( - InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Typed buffer load 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW class methods --- - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW( - InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Typed buffer load 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X - - // --- description from .arch file --- - // Typed buffer store 1 dword with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY - - // --- description from .arch file --- - // Typed buffer store 2 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ - - // --- description from .arch file --- - // Typed buffer store 3 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW class methods --- - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt) - : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW() - { - } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW - - // --- description from .arch file --- - // Typed buffer store 4 dwords with format conversion. - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute( - GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( - GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc( - GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD class methods --- - - Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD - - Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD() - { - } // ~Inst_MIMG__IMAGE_LOAD - - // --- description from .arch file --- - // Image memory load with format conversion specified in T#. No sampler. - void - Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_MIP class methods --- - - Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP - - Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP - - // --- description from .arch file --- - // Image memory load with user-supplied mip level. No sampler. - void - Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_PCK class methods --- - - Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_pck") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_PCK - - Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK() - { - } // ~Inst_MIMG__IMAGE_LOAD_PCK - - // --- description from .arch file --- - // Image memory load with no format conversion. No sampler. - void - Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_PCK_SGN class methods --- - - Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_pck_sgn") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_PCK_SGN - - Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN() - { - } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN - - // --- description from .arch file --- - // Image memory load with with no format conversion and sign extension. No - // --- sampler. - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK class methods --- - - Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip_pck") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP_PCK - - Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK - - // --- description from .arch file --- - // Image memory load with user-supplied mip level, no format conversion. No - // --- sampler. - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN class methods --- - - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_load_mip_pck_sgn") - { - setFlag(MemoryRef); - setFlag(Load); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN() - { - } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN - - // --- description from .arch file --- - // Image memory load with user-supplied mip level, no format conversion and - // --- with sign extension. No sampler. - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE class methods --- - - Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE - - Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE() - { - } // ~Inst_MIMG__IMAGE_STORE - - // --- description from .arch file --- - // Image memory store with format conversion specified in T#. No sampler. - void - Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE_MIP class methods --- - - Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_mip") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_MIP - - Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP() - { - } // ~Inst_MIMG__IMAGE_STORE_MIP - - // --- description from .arch file --- - // Image memory store with format conversion specified in T# to user - // specified mip level. No sampler. - void - Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE_PCK class methods --- - - Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_pck") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_PCK - - Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK() - { - } // ~Inst_MIMG__IMAGE_STORE_PCK - - // --- description from .arch file --- - // Image memory store of packed data without format conversion. No sampler. - void - Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_STORE_MIP_PCK class methods --- - - Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_store_mip_pck") - { - setFlag(MemoryRef); - setFlag(Store); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_STORE_MIP_PCK - - Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK() - { - } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK - - // --- description from .arch file --- - // Image memory store of packed data without format conversion to - // user-supplied mip level. No sampler. - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_MIMG__IMAGE_GET_RESINFO class methods --- - - Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_get_resinfo") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GET_RESINFO - - Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO() - { - } // ~Inst_MIMG__IMAGE_GET_RESINFO - - // --- description from .arch file --- - // return resource info for a given mip level specified in the address - // vgpr. No sampler. Returns 4 integer values into VGPRs 3-0: - // {num_mip_levels, depth, height, width}. - void - Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SWAP class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SWAP - - Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_CMPSWAP class methods --- - - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_ADD class methods --- - - Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_ADD - - Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_ADD - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SUB class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SUB - - Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SUB - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SMIN class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SMIN - - Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_UMIN class methods --- - - Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_UMIN - - Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_SMAX class methods --- - - Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_SMAX - - Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_UMAX class methods --- - - Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_UMAX - - Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_AND class methods --- - - Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_AND - - Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_AND - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_OR class methods --- - - Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_OR - - Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_OR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_XOR class methods --- - - Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_XOR - - Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_XOR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_INC class methods --- - - Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_INC - - Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_INC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_ATOMIC_DEC class methods --- - - Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_ATOMIC_DEC - - Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC() - { - } // ~Inst_MIMG__IMAGE_ATOMIC_DEC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE class methods --- - - Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample") - { - } // Inst_MIMG__IMAGE_SAMPLE - - Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE() - { - } // ~Inst_MIMG__IMAGE_SAMPLE - - // --- description from .arch file --- - // sample texture map. - void - Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CL - - Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D - - Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D - - // --- description from .arch file --- - // sample texture map, with user derivatives - void - Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_CL - - Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader, with user - // --- derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_L class methods --- - - Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_L - - Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_L - - // --- description from .arch file --- - // sample texture map, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B - - Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B - - // --- description from .arch file --- - // sample texture map, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_CL - - Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_LZ class methods --- - - Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_LZ - - Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_LZ - - // --- description from .arch file --- - // sample texture map, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C - - Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C - - // --- description from .arch file --- - // sample texture map, with PCF. - void - Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CL - - Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D - - Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D - - // --- description from .arch file --- - // SAMPLE_C, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_L class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_L - - Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_L - - // --- description from .arch file --- - // SAMPLE_C, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B - - Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B - - // --- description from .arch file --- - // SAMPLE_C, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_LZ - - Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ - - // --- description from .arch file --- - // SAMPLE_C, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_O - - Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_O - - // --- description from .arch file --- - // sample texture map, with user offsets. - void - Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CL_O - - Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O - - // --- description from .arch file --- - // SAMPLE_O with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_O - - Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_O - - // --- description from .arch file --- - // SAMPLE_O, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_D_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_d_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O - - // --- description from .arch file --- - // SAMPLE_O, with LOD clamp specified in shader, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_L_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_L_O - - Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_L_O - - // --- description from .arch file --- - // SAMPLE_O, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_O - - Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_O - - // --- description from .arch file --- - // SAMPLE_O, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_B_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O - - // --- description from .arch file --- - // SAMPLE_O, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_LZ_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_LZ_O - - Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O - - // --- description from .arch file --- - // SAMPLE_O, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_O - - Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_O - - // --- description from .arch file --- - // SAMPLE_C with user specified offsets. - void - Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader. - void - Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_O - - Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O - - // --- description from .arch file --- - // SAMPLE_C_O, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_d_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives. - void - Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_L_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_L_O - - Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O - - // --- description from .arch file --- - // SAMPLE_C_O, with user LOD. - void - Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_O - - Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O - - // --- description from .arch file --- - // SAMPLE_C_O, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader, with lod bias. - void - Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O - - // --- description from .arch file --- - // SAMPLE_C_O, from level 0. - void - Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4 class methods --- - - Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4 - - Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4() - { - } // ~Inst_MIMG__IMAGE_GATHER4 - - // --- description from .arch file --- - // gather 4 single component elements (2x2). - void - Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_CL - - Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD clamp. - void - Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_L class methods --- - - Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_L - - Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L() - { - } // ~Inst_MIMG__IMAGE_GATHER4_L - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD. - void - Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B class methods --- - - Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B - - Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias. - void - Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_CL - - Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias and clamp. - void - Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_LZ class methods --- - - Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_LZ - - Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ() - { - } // ~Inst_MIMG__IMAGE_GATHER4_LZ - - // --- description from .arch file --- - // gather 4 single component elements (2x2) at level 0. - void - Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C class methods --- - - Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C - - Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with PCF. - void - Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_CL - - Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD clamp and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_L class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_l") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_L - - Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_L - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user LOD and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B - - Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_CL - - Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL - - // --- description from .arch file --- - // gather 4 single component elements (2x2) with user bias, clamp and PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_LZ class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_lz") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_LZ - - Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ - - // --- description from .arch file --- - // gather 4 single component elements (2x2) at level 0, with PCF. - void - Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_O - - Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_O - - // --- description from .arch file --- - // GATHER4, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_CL_O - - Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_CL_O - - // --- description from .arch file --- - // GATHER4_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_L_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_L_O - - Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_L_O - - // --- description from .arch file --- - // GATHER4_L, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_O - - Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_O - - // --- description from .arch file --- - // GATHER4_B, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_B_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_B_CL_O - - Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O - - // --- description from .arch file --- - // GATHER4_B_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_LZ_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_LZ_O - - Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O - - // --- description from .arch file --- - // GATHER4_LZ, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_O - - Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_O - - // --- description from .arch file --- - // GATHER4_C, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_CL_O - - Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O - - // --- description from .arch file --- - // GATHER4_C_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_L_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_l_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_L_O - - Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O - - // --- description from .arch file --- - // GATHER4_C_L, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_O - - Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O - - // --- description from .arch file --- - // GATHER4_B, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O - - // --- description from .arch file --- - // GATHER4_B_CL, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GATHER4_C_LZ_O class methods --- - - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_gather4_c_lz_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O() - { - } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O - - // --- description from .arch file --- - // GATHER4_C_LZ, with user offsets. - void - Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_GET_LOD class methods --- - - Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_get_lod") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_GET_LOD - - Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD() - { - } // ~Inst_MIMG__IMAGE_GET_LOD - - // --- description from .arch file --- - // Return calculated LOD. Vdata gets 2 32bit integer values: { rawLOD, - // --- clampedLOD }. - void - Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD - - Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD - - // --- description from .arch file --- - // sample texture map, with user derivatives (LOD per quad) - void - Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_CL - - Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL - - // --- description from .arch file --- - // sample texture map, with LOD clamp specified in shader, with user - // --- derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD - - Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD - - // --- description from .arch file --- - // SAMPLE_C, with user derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_cl") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL - - // --- description from .arch file --- - // SAMPLE_C, with LOD clamp specified in shader, with user derivatives - // (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_O - - Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O - - // --- description from .arch file --- - // SAMPLE_O, with user derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_cd_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O - - // --- description from .arch file --- - // SAMPLE_O, with LOD clamp specified in shader, with user derivatives - // (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O - - // --- description from .arch file --- - // SAMPLE_C_O, with user derivatives (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O class methods --- - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O( - InFmt_MIMG *iFmt) - : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o") - { - setFlag(GlobalSegment); - } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O() - { - } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O - - // --- description from .arch file --- - // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives - // (LOD per quad). - void - Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_EXP__EXP class methods --- - - Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt) - : Inst_EXP(iFmt, "exp") - { - } // Inst_EXP__EXP - - Inst_EXP__EXP::~Inst_EXP__EXP() - { - } // ~Inst_EXP__EXP - - // --- description from .arch file --- - // Export through SX. - void - Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_LOAD_UBYTE class methods --- - - Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_ubyte") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_UBYTE - - Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE() - { - } // ~Inst_FLAT__FLAT_LOAD_UBYTE - - // --- description from .arch file --- - // Untyped buffer load unsigned byte (zero extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } // execute - // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods --- - - Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_sbyte") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_SBYTE - - Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE() - { - } // ~Inst_FLAT__FLAT_LOAD_SBYTE - - // --- description from .arch file --- - // Untyped buffer load signed byte (sign extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_FLAT__FLAT_LOAD_USHORT class methods --- - - Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_ushort") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_USHORT - - Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT() - { - } // ~Inst_FLAT__FLAT_LOAD_USHORT - - // --- description from .arch file --- - // Untyped buffer load unsigned short (zero extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (VecElemU32)((reinterpret_cast( - gpuDynInst->d_data))[lane]); - } - } - vdst.write(); - } // execute - - // --- Inst_FLAT__FLAT_LOAD_SSHORT class methods --- - - Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_sshort") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_SSHORT - - Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT() - { - } // ~Inst_FLAT__FLAT_LOAD_SSHORT - - // --- description from .arch file --- - // Untyped buffer load signed short (sign extend to VGPR destination). - void - Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - - void - Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_FLAT__FLAT_LOAD_DWORD class methods --- - - Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dword") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORD - - Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORD - - // --- description from .arch file --- - // Untyped buffer load dword. - void - Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - vdst.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_LOAD_DWORDX2 class methods --- - - Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx2") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX2 - - Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer load 2 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - vdst.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_LOAD_DWORDX3 class methods --- - - Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx3") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX3 - - Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer load 3 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<3>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_LOAD_DWORDX4 class methods --- - - Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_load_dwordx4") - { - setFlag(MemoryRef); - setFlag(Load); - } // Inst_FLAT__FLAT_LOAD_DWORDX4 - - Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4() - { - } // ~Inst_FLAT__FLAT_LOAD_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer load 4 dwords. - void - Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemRead<4>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - VecOperandU32 vdst0(gpuDynInst, extData.VDST); - VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1); - VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2); - VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst0[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4]; - vdst1[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1]; - vdst2[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2]; - vdst3[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3]; - } - } - - vdst0.write(); - vdst1.write(); - vdst2.write(); - vdst3.write(); - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_BYTE class methods --- - - Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_byte") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_BYTE - - Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE() - { - } // ~Inst_FLAT__FLAT_STORE_BYTE - - // --- description from .arch file --- - // Untyped buffer store byte. - void - Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU8 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // execute - // --- Inst_FLAT__FLAT_STORE_SHORT class methods --- - - Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_short") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_SHORT - - Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT() - { - } // ~Inst_FLAT__FLAT_STORE_SHORT - - // --- description from .arch file --- - // Untyped buffer store short. - void - Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU16 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORD class methods --- - - Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dword") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORD - - Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD() - { - } // ~Inst_FLAT__FLAT_STORE_DWORD - - // --- description from .arch file --- - // Untyped buffer store dword. - void - Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORDX2 class methods --- - - Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx2") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX2 - - Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX2 - - // --- description from .arch file --- - // Untyped buffer store 2 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->d_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORDX3 class methods --- - - Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx3") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX3 - - Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX3 - - // --- description from .arch file --- - // Untyped buffer store 3 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data0(gpuDynInst, extData.DATA); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - - data0.read(); - data1.read(); - data2.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 3 + 2] = data2[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<3>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_STORE_DWORDX4 class methods --- - - Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_store_dwordx4") - { - setFlag(MemoryRef); - setFlag(Store); - } // Inst_FLAT__FLAT_STORE_DWORDX4 - - Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4() - { - } // ~Inst_FLAT__FLAT_STORE_DWORDX4 - - // --- description from .arch file --- - // Untyped buffer store 4 dwords. - void - Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - wf->decExpInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data0(gpuDynInst, extData.DATA); - ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1); - ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2); - ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3); - - data0.read(); - data1.read(); - data2.read(); - data3.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4] = data0[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 1] = data1[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 2] = data2[lane]; - (reinterpret_cast( - gpuDynInst->d_data))[lane * 4 + 3] = data3[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initMemWrite<4>(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) - { - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SWAP class methods --- - - Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_swap") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SWAP - - Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods --- - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP - ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_cmpswap") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP() - { - } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // src = DATA[0]; - // cmp = DATA[1]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1); - - data.read(); - cmp.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_ADD class methods --- - - Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD - - Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SUB class methods --- - - Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_sub") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SUB - - Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SUB - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_SMIN class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMIN - - Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umin") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMIN - - Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMIN - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_SMAX class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMAX - - Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umax") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMAX - - Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMAX - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_AND class methods --- - - Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_and") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_AND - - Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND() - { - } // ~Inst_FLAT__FLAT_ATOMIC_AND - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_OR class methods --- - - Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_or") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_OR - - Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR() - { - } // ~Inst_FLAT__FLAT_ATOMIC_OR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU32 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU32 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - - // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods --- - - Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_xor") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_XOR - - Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR() - { - } // ~Inst_FLAT__FLAT_ATOMIC_XOR - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA; - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_INC class methods --- - - Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_inc") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_INC - - Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC() - { - } // ~Inst_FLAT__FLAT_ATOMIC_INC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_DEC class methods --- - - Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_dec") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_DEC - - Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC() - { - } // ~Inst_FLAT__FLAT_ATOMIC_DEC - - // --- description from .arch file --- - // 32b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 - // (unsigned compare); RETURN_DATA = tmp. - void - Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_swap_x2") - { - setFlag(AtomicExch); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2") - { - setFlag(AtomicCAS); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // src = DATA[0:1]; - // cmp = DATA[2:3]; - // MEM[ADDR] = (tmp == cmp) ? src : tmp; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2); - - data.read(); - cmp.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->x_data))[lane] - = data[lane]; - (reinterpret_cast(gpuDynInst->a_data))[lane] - = cmp[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_ADD_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_add_x2") - { - setFlag(AtomicAdd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] += DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *wf = gpuDynInst->wavefront(); - - if (gpuDynInst->exec_mask.none()) { - wf->decVMemInstsIssued(); - if (isFlat()) { - wf->decLGKMInstsIssued(); - } - return; - } - - gpuDynInst->execUnitId = wf->execUnitId; - gpuDynInst->latency.init(gpuDynInst->computeUnit()); - gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); - - ConstVecOperandU64 data(gpuDynInst, extData.DATA); - - data.read(); - - calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - (reinterpret_cast(gpuDynInst->a_data))[lane] - = data[lane]; - } - } - - issueRequestHelper(gpuDynInst); - } // execute - - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst) - { - initAtomicAccess(gpuDynInst); - } // initiateAcc - - void - Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst) - { - if (isAtomicRet()) { - VecOperandU64 vdst(gpuDynInst, extData.VDST); - - - for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { - if (gpuDynInst->exec_mask[lane]) { - vdst[lane] = (reinterpret_cast( - gpuDynInst->d_data))[lane]; - } - } - - vdst.write(); - } - } // completeAcc - // --- Inst_FLAT__FLAT_ATOMIC_SUB_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_sub_x2") - { - setFlag(AtomicSub); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umin_x2") - { - setFlag(AtomicMin); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_smax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_umax_x2") - { - setFlag(AtomicMax); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_AND_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_and_x2") - { - setFlag(AtomicAnd); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_AND_X2 - - Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] &= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_OR_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_or_x2") - { - setFlag(AtomicOr); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_OR_X2 - - Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] |= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_XOR_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_xor_x2") - { - setFlag(AtomicXor); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] ^= DATA[0:1]; - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_INC_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_inc_x2") - { - setFlag(AtomicInc); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_INC_X2 - - Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute - // --- Inst_FLAT__FLAT_ATOMIC_DEC_X2 class methods --- - - Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2( - InFmt_FLAT *iFmt) - : Inst_FLAT(iFmt, "flat_atomic_dec_x2") - { - setFlag(AtomicDec); - if (instData.GLC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - setFlag(MemoryRef); - } // Inst_FLAT__FLAT_ATOMIC_DEC_X2 - - Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2() - { - } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2 - - // --- description from .arch file --- - // 64b: - // tmp = MEM[ADDR]; - // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 - // (unsigned compare); - // RETURN_DATA[0:1] = tmp. - void - Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) - { - panicUnimplemented(); - } // execute -} // namespace VegaISA -} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index ca349c365f..a979c1e492 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -32,6 +32,10 @@ #ifndef __ARCH_VEGA_INSTS_INSTRUCTIONS_HH__ #define __ARCH_VEGA_INSTS_INSTRUCTIONS_HH__ +#include +#include + +#include "arch/amdgpu/common/dtype/mxfp_types.hh" #include "arch/amdgpu/vega/gpu_decoder.hh" #include "arch/amdgpu/vega/insts/gpu_static_inst.hh" #include "arch/amdgpu/vega/insts/op_encodings.hh" @@ -8098,6 +8102,74 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP2__V_SUBREV_U32 + class Inst_VOP2__V_FMAC_F32 : public Inst_VOP2 + { + public: + Inst_VOP2__V_FMAC_F32(InFmt_VOP2*); + ~Inst_VOP2__V_FMAC_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP2__V_FMAC_F32 + + class Inst_VOP2__V_XNOR_B32 : public Inst_VOP2 + { + public: + Inst_VOP2__V_XNOR_B32(InFmt_VOP2*); + ~Inst_VOP2__V_XNOR_B32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP2__V_XNOR_B32 + class Inst_VOP1__V_NOP : public Inst_VOP1 { public: @@ -9818,6 +9890,38 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP1__V_CLREXCP + class Inst_VOP1__V_MOV_B64 : public Inst_VOP1 + { + public: + Inst_VOP1__V_MOV_B64(InFmt_VOP1*); + ~Inst_VOP1__V_MOV_B64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src + return 8; + case 1: //vdst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP1__V_MOV_B64 + class Inst_VOP1__V_CVT_F16_U16 : public Inst_VOP1 { public: @@ -10458,6 +10562,38 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP1__V_LOG_LEGACY_F32 + class Inst_VOP1__V_ACCVGPR_MOV_B32 : public Inst_VOP1 + { + public: + Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1*); + ~Inst_VOP1__V_ACCVGPR_MOV_B32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src + return 4; + case 1: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP1__V_ACCVGPR_MOV_B32 + class Inst_VOPC__V_CMP_CLASS_F32 : public Inst_VOPC { public: @@ -25814,6 +25950,40 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP3__V_SUBREV_U32 + class Inst_VOP3__V_FMAC_F32 : public Inst_VOP3A + { + public: + Inst_VOP3__V_FMAC_F32(InFmt_VOP3A*); + ~Inst_VOP3__V_FMAC_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3__V_FMAC_F32 + class Inst_VOP3__V_NOP : public Inst_VOP3A { public: @@ -30124,6 +30294,42 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP3__V_DIV_FIXUP_F16 + class Inst_VOP3__V_LSHL_ADD_U64 : public Inst_VOP3A + { + public: + Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A*); + ~Inst_VOP3__V_LSHL_ADD_U64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 8; + case 1: //src_1 + return 4; + case 2: //src_2 + return 8; + case 3: //vdst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3__V_LSHL_ADD_U64 + class Inst_VOP3__V_CVT_PKACCUM_U8_F32 : public Inst_VOP3A { public: @@ -32986,6 +33192,74 @@ namespace VegaISA void completeAcc(GPUDynInstPtr) override; }; // Inst_DS__DS_READ_U16 + class Inst_DS__DS_READ_U16_D16 : public Inst_DS + { + public: + Inst_DS__DS_READ_U16_D16(InFmt_DS*); + ~Inst_DS__DS_READ_U16_D16(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_a + return 4; + case 1: //vgpr_rtn + return 2; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_DS__DS_READ_U16_D16 + + class Inst_DS__DS_READ_U16_D16_HI : public Inst_DS + { + public: + Inst_DS__DS_READ_U16_D16_HI(InFmt_DS*); + ~Inst_DS__DS_READ_U16_D16_HI(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_a + return 4; + case 1: //vgpr_rtn + return 2; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_DS__DS_READ_U16_D16_HI + class Inst_DS__DS_SWIZZLE_B32 : public Inst_DS { public: @@ -36670,6 +36944,82 @@ namespace VegaISA void completeAcc(GPUDynInstPtr) override; }; // Inst_MUBUF__BUFFER_LOAD_SSHORT + class Inst_MUBUF__BUFFER_LOAD_SHORT_D16 : public Inst_MUBUF + { + public: + Inst_MUBUF__BUFFER_LOAD_SHORT_D16(InFmt_MUBUF*); + ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_a + return 8; + case 1: //sgpr_r + return 16; + case 2: //sgpr_o + return 4; + case 3: //vgpr_d + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_MUBUF__BUFFER_LOAD_SHORT_D16 + + class Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI : public Inst_MUBUF + { + public: + Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(InFmt_MUBUF*); + ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_a + return 8; + case 1: //sgpr_r + return 16; + case 2: //sgpr_o + return 4; + case 3: //vgpr_d + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI + class Inst_MUBUF__BUFFER_LOAD_DWORD : public Inst_MUBUF { public: @@ -42280,6 +42630,43 @@ namespace VegaISA void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_STORE_SHORT + class Inst_FLAT__FLAT_STORE_SHORT_D16_HI : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_STORE_SHORT_D16_HI(InFmt_FLAT*); + ~Inst_FLAT__FLAT_STORE_SHORT_D16_HI(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 0; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 2; + case 2: //saddr + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_STORE_SHORT_D16_HI + class Inst_FLAT__FLAT_STORE_DWORD : public Inst_FLAT { public: @@ -42580,6 +42967,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SUB class Inst_FLAT__FLAT_ATOMIC_SMIN : public Inst_FLAT @@ -42656,6 +43045,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMIN class Inst_FLAT__FLAT_ATOMIC_SMAX : public Inst_FLAT @@ -42732,6 +43123,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMAX class Inst_FLAT__FLAT_ATOMIC_AND : public Inst_FLAT @@ -42769,6 +43162,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_AND class Inst_FLAT__FLAT_ATOMIC_OR : public Inst_FLAT @@ -42845,6 +43240,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_XOR class Inst_FLAT__FLAT_ATOMIC_INC : public Inst_FLAT @@ -42882,6 +43279,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_INC class Inst_FLAT__FLAT_ATOMIC_DEC : public Inst_FLAT @@ -42919,6 +43318,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_DEC class Inst_FLAT__FLAT_ATOMIC_SWAP_X2 : public Inst_FLAT @@ -42956,6 +43357,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SWAP_X2 class Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2 : public Inst_FLAT @@ -43071,6 +43474,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SUB_X2 class Inst_FLAT__FLAT_ATOMIC_SMIN_X2 : public Inst_FLAT @@ -43108,6 +43513,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SMIN_X2 class Inst_FLAT__FLAT_ATOMIC_UMIN_X2 : public Inst_FLAT @@ -43145,6 +43552,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMIN_X2 class Inst_FLAT__FLAT_ATOMIC_SMAX_X2 : public Inst_FLAT @@ -43182,6 +43591,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_SMAX_X2 class Inst_FLAT__FLAT_ATOMIC_UMAX_X2 : public Inst_FLAT @@ -43219,6 +43630,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_UMAX_X2 class Inst_FLAT__FLAT_ATOMIC_AND_X2 : public Inst_FLAT @@ -43256,6 +43669,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_AND_X2 class Inst_FLAT__FLAT_ATOMIC_OR_X2 : public Inst_FLAT @@ -43293,6 +43708,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_OR_X2 class Inst_FLAT__FLAT_ATOMIC_XOR_X2 : public Inst_FLAT @@ -43330,6 +43747,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_XOR_X2 class Inst_FLAT__FLAT_ATOMIC_INC_X2 : public Inst_FLAT @@ -43367,6 +43786,8 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_INC_X2 class Inst_FLAT__FLAT_ATOMIC_DEC_X2 : public Inst_FLAT @@ -43404,7 +43825,1068 @@ namespace VegaISA } // getOperandSize void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; }; // Inst_FLAT__FLAT_ATOMIC_DEC_X2 + + class Inst_FLAT__FLAT_ATOMIC_ADD_F32 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_ADD_F32(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_ADD_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 4; + case 2: //vgpr_dst or saddr + return isFlat() ? 4 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_ADD_F32 + + class Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 4; + case 2: //vgpr_dst or saddr + return isFlat() ? 4 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_PK_ADD_F16 + + class Inst_FLAT__FLAT_ATOMIC_ADD_F64 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_ADD_F64(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_ADD_F64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 8; + case 2: //vgpr_dst or saddr + return isFlat() ? 8 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_ADD_F64 + + class Inst_FLAT__FLAT_ATOMIC_MIN_F64 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_MIN_F64(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_MIN_F64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 8; + case 2: //vgpr_dst or saddr + return isFlat() ? 8 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_MIN_F64 + + class Inst_FLAT__FLAT_ATOMIC_MAX_F64 : public Inst_FLAT + { + public: + Inst_FLAT__FLAT_ATOMIC_MAX_F64(InFmt_FLAT*); + ~Inst_FLAT__FLAT_ATOMIC_MAX_F64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return isFlat() ? 2 : 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //vgpr_addr + return vgprIsOffset() ? 4 : 8; + case 1: //vgpr_src + return 8; + case 2: //vgpr_dst or saddr + return isFlat() ? 8 : 8; + case 3: //vgpr_dst + assert(!isFlat()); + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + void initiateAcc(GPUDynInstPtr) override; + void completeAcc(GPUDynInstPtr) override; + }; // Inst_FLAT__FLAT_ATOMIC_MAX_F64 + + class Inst_VOP3P__V_PK_FMA_F32 : public Inst_VOP3P + { + public: + Inst_VOP3P__V_PK_FMA_F32(InFmt_VOP3P*); + ~Inst_VOP3P__V_PK_FMA_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src0 + return 8; + case 1: // src1 + return 8; + case 2: // src2 + return 8; + case 3: // dst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3P__V_PK_FMA_F32 + + class Inst_VOP3P__V_PK_MUL_F32 : public Inst_VOP3P + { + public: + Inst_VOP3P__V_PK_MUL_F32(InFmt_VOP3P*); + ~Inst_VOP3P__V_PK_MUL_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src0 + return 8; + case 1: // src1 + return 8; + case 2: // dst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3P__V_PK_MUL_F32 + + class Inst_VOP3P__V_PK_ADD_F32 : public Inst_VOP3P + { + public: + Inst_VOP3P__V_PK_ADD_F32(InFmt_VOP3P*); + ~Inst_VOP3P__V_PK_ADD_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src0 + return 8; + case 1: // src1 + return 8; + case 2: // dst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3P__V_PK_ADD_F32 + + class Inst_VOP3P__V_PK_MOV_B32 : public Inst_VOP3P + { + public: + Inst_VOP3P__V_PK_MOV_B32(InFmt_VOP3P*); + ~Inst_VOP3P__V_PK_MOV_B32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src0 + return 8; + case 1: // src1 + return 8; + case 2: // dst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3P__V_PK_MOV_B32 + + template + class Inst_VOP3P_MAI__V_MFMA : public Inst_VOP3P_MAI + { + + private: + static constexpr int gprs_a = M * K * B / 64, gprs_b = K * N * B / 64, + gprs_c_d = M * N * B / 64; + + public: + Inst_VOP3P_MAI__V_MFMA(InFmt_VOP3P_MAI *iFmt) + : Inst_VOP3P_MAI(iFmt, *MNEMONIC) + { + setFlag(ALU); + setFlag(MFMA); + if (_delta == 2) { + setFlag(F64); + } else if (_delta == 1) { + setFlag(F32); + } + } + ~Inst_VOP3P_MAI__V_MFMA() {} + + int getNumOperands() override { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int getOperandSize(int opIdx) override { + switch (opIdx) { + case 0: // src0 "A" + return 4*gprs_a; + case 1: // src1 "B" + return 4*gprs_b; + case 2: // src2 "C" + return 4*gprs_c_d; + case 3: // dst + return 4*gprs_c_d; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void + execute(GPUDynInstPtr gpuDynInst) override + { + int acc_cd_off = 0; + int acc_a_off = 0; + int acc_b_off = 0; + if (instData.ACC_CD) { + acc_cd_off = gpuDynInst->wavefront()->accumOffset; + } + if (extData.ACC) { + int tmp_acc = extData.ACC; + if (tmp_acc & 0x1) { + acc_a_off = gpuDynInst->wavefront()->accumOffset; + } + if (tmp_acc & 0x2) { + acc_b_off = gpuDynInst->wavefront()->accumOffset; + } + } + + alignas(T1) std::byte _src0[sizeof(T1) * gprs_a]; + alignas(T1) std::byte _src1[sizeof(T1) * gprs_b]; + alignas(T1) std::byte _src2[sizeof(T1) * gprs_c_d]; + alignas(T2) std::byte _vdst[sizeof(T2) * gprs_c_d]; + T1 *src0 = std::launder(reinterpret_cast(&_src0)); + T1 *src1 = std::launder(reinterpret_cast(&_src1)); + T1 *src2 = std::launder(reinterpret_cast(&_src2)); + T2 *vdst = std::launder(reinterpret_cast(&_vdst)); + + // Handling of src2 is a bit tricky. The operator[] overload cannot + // be used for dword count > 2, and the dword count here is 4. Usually + // src2 is a VGPR/AccGPR, but it might also be constant. In order to + // use operator[] and handle constants, check for VGPR here and set + // a delta for each of the src2 GPRs. + int delta = isVectorReg(extData.SRC0) ? _delta : 0; + for (int i = 0; i < gprs_a; i++) { + new (&src0[i]) T1(gpuDynInst, extData.SRC0+acc_a_off+i*delta); + src0[i].readSrc(); + } + + delta = isVectorReg(extData.SRC1) ? _delta : 0; + for (int i = 0; i < gprs_b; i++) { + new (&src1[i]) T1(gpuDynInst, extData.SRC1+acc_b_off+i*delta); + src1[i].readSrc(); + } + + delta = isVectorReg(extData.SRC2) ? _delta : 0; + for (int i = 0; i < gprs_c_d; i++) { + new (&src2[i]) T1(gpuDynInst, extData.SRC2+acc_cd_off+i*delta); + src2[i].readSrc(); + } + + for (int i = 0; i < gprs_c_d; i++) { + new (&vdst[i]) T2(gpuDynInst, instData.VDST+acc_cd_off+i*_delta); + } + + // These values and meanings are described in the MI300 ISA manual: + // + // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/ + // instruction-set-architectures/ + // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf + // + // in section 7.1.4.2. In theory, only the M, N, K, and H values change + // for each MFMA instruction. + + // Output layout + constexpr int H = _delta == 2 ? 1 : 4; + + // This replaces `constexpr int B_I = std::ceil(64.0f / (N * M / H));` + // which failed clang compiler tests as it's not a constant expression. + constexpr float B_I_f = 64.0f / (N * M / H); + constexpr int B_I = + (static_cast(static_cast(B_I_f)) == B_I_f) + ? static_cast(B_I_f) + : static_cast(B_I_f) + ((B_I_f > 0) ? 1 : 0); + + constexpr int M_I = (64 / B_I) / N; + constexpr int G = M / (H * M_I); + + float result[M][N]; + + // Input layout + constexpr int K_L = K / (64 / (M * B)); + + for (int block = 0; block < B; block++) { + // Load src2 into result. src2 is row major + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int item = (i % H) + H * (i/(H*M_I) + G * (block / B_I)); + int lane = j + N * ((i / H) % M_I + M_I * (block % B_I)); + + result[i][j] = src2[item][lane]; + } + } + + // Compute new result + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + for (int k = 0; k < K; ++k) { + // src0 is column major, src1 is row major + int lane_A = i + M * (block + B * (k / K_L)); + int lane_B = j + N * (block + B * (k / K_L)); + int item = k % K_L; + result[i][j] += + src0[item][lane_A] * src1[item][lane_B]; + } + } + } + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int item = (i % H) + H * (i/(H*M_I) + G * (block / B_I)); + int lane = j + N * ((i / H) % M_I + M_I * (block % B_I)); + + vdst[item][lane] = result[i][j]; + } + } + } + + for (int i = 0; i < gprs_c_d; ++i) { + vdst[i].write(); + } + + for (int i = 0; i < gprs_a; i++) { + std::destroy_at(&src0[i]); + } + for (int i = 0; i < gprs_b; i++) { + std::destroy_at(&src1[i]); + } + for (int i = 0; i < gprs_c_d; i++) { + std::destroy_at(&src2[i]); + } + for (int i = 0; i < gprs_c_d; i++) { + std::destroy_at(&vdst[i]); + } + } // execute + }; + + static const char *MNEM__V_MFMA_F32_4X4X1_16B_F32 = + "v_mfma_f32_4x4x1_16b_f32"; + using Inst_VOP3P_MAI__V_MFMA_F32_4X4X1_16B_F32 = + Inst_VOP3P_MAI__V_MFMA<1, 4, 4, 1, 16, ConstVecOperandF32, + VecOperandF32, &MNEM__V_MFMA_F32_4X4X1_16B_F32>; + + static const char *MNEM__V_MFMA_F32_32X32X1_2B_F32 = + "v_mfma_f32_32x32x1_2b_f32"; + using Inst_VOP3P_MAI__V_MFMA_F32_32X32X1_2B_F32 = + Inst_VOP3P_MAI__V_MFMA<1, 32, 32, 1, 2, ConstVecOperandF32, + VecOperandF32, + &MNEM__V_MFMA_F32_32X32X1_2B_F32>; + + static const char *MNEM__V_MFMA_F32_32X32X2_F32 = + "v_mfma_f32_32x32x2_f32"; + using Inst_VOP3P_MAI__V_MFMA_F32_32X32X2_F32 = + Inst_VOP3P_MAI__V_MFMA<1, 32, 32, 2, 1, ConstVecOperandF32, + VecOperandF32, &MNEM__V_MFMA_F32_32X32X2_F32>; + + static const char *MNEM__V_MFMA_F32_16X16X4_F32 = + "v_mfma_f32_16x16x4_f32"; + using Inst_VOP3P_MAI__V_MFMA_F32_16X16X4_F32 = + Inst_VOP3P_MAI__V_MFMA<1, 16, 16, 4, 1, ConstVecOperandF32, + VecOperandF32, &MNEM__V_MFMA_F32_16X16X4_F32>; + + static const char *MNEM__V_MFMA_F32_16X16X1_4B_F32 = + "v_mfma_f32_16x16x1_4b_f32"; + using Inst_VOP3P_MAI__V_MFMA_F32_16X16X1_4B_F32 = + Inst_VOP3P_MAI__V_MFMA<1, 16, 16, 1, 4, ConstVecOperandF32, + VecOperandF32, + &MNEM__V_MFMA_F32_16X16X1_4B_F32>; + + static const char *MNEM__V_MFMA_F64_4X4X4_4B_F64 = + "v_mfma_f64_4x4x4_4b_f64"; + using Inst_VOP3P_MAI__V_MFMA_F64_4X4X4_4B_F64 = + Inst_VOP3P_MAI__V_MFMA<2, 4, 4, 4, 4, ConstVecOperandF64, + VecOperandF64, &MNEM__V_MFMA_F64_4X4X4_4B_F64>; + + static const char *MNEM__V_MFMA_F64_16X16X4_F64 = + "v_mfma_f64_16x16x4_f64"; + using Inst_VOP3P_MAI__V_MFMA_F64_16X16X4_F64 = + Inst_VOP3P_MAI__V_MFMA<2, 16, 16, 4, 1, ConstVecOperandF64, + VecOperandF64, &MNEM__V_MFMA_F64_16X16X4_F64>; + + + template + class Inst_VOP3P_MAI__V_MFMA_MXFP : public Inst_VOP3P_MAI + { + + private: + // Scale GPRs needed by elements / GPR (gpr_ratio) + static constexpr int gpr_ratio = 32 / MXFPT::size(); + static constexpr int gprs_a = M * K * B / (64 * gpr_ratio); + static constexpr int gprs_b = K * N * B / (64 * gpr_ratio); + + // Always F32 which has an effective gpr_ratio of 1 + static constexpr int gprs_c_d = M * N * B / 64; + + public: + Inst_VOP3P_MAI__V_MFMA_MXFP(InFmt_VOP3P_MAI *iFmt) + : Inst_VOP3P_MAI(iFmt, *MNEMONIC) + { + setFlag(ALU); + setFlag(MFMA); + if (MXFPT::size() == 16) { + setFlag(F16); + } + } + ~Inst_VOP3P_MAI__V_MFMA_MXFP() {} + + int getNumOperands() override { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int getOperandSize(int opIdx) override { + switch (opIdx) { + case 0: // src0 "A" + return 4*gprs_a; + case 1: // src1 "B" + return 4*gprs_b; + case 2: // src2 "C" + return 4*gprs_c_d; + case 3: // dst + return 4*gprs_c_d; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void + execute(GPUDynInstPtr gpuDynInst) override + { + int acc_cd_off = 0; + int acc_a_off = 0; + int acc_b_off = 0; + if (instData.ACC_CD) { + acc_cd_off = gpuDynInst->wavefront()->accumOffset; + } + if (extData.ACC) { + int tmp_acc = extData.ACC; + if (tmp_acc & 0x1) { + acc_a_off = gpuDynInst->wavefront()->accumOffset; + } + if (tmp_acc & 0x2) { + acc_b_off = gpuDynInst->wavefront()->accumOffset; + } + } + + // Read the MXFP types as U32 - Consider this "untyped." + // A ConstVecOperand needs to be used for src2 as it could be an + // inline constant. The Const version provides an operator[] overload + // to read inline constants to each lane. The non-const type of src2 + // should be used for vdst to make it writeable. + using T1 = ConstVecOperandU32; + using T2 = ConstVecOperandF32; + using T3 = VecOperandF32; + + alignas(T1) std::byte _src0[sizeof(T1) * gprs_a]; + alignas(T1) std::byte _src1[sizeof(T1) * gprs_b]; + alignas(T2) std::byte _src2[sizeof(T2) * gprs_c_d]; + alignas(T3) std::byte _vdst[sizeof(T3) * gprs_c_d]; + T1 *src0 = std::launder(reinterpret_cast(&_src0)); + T1 *src1 = std::launder(reinterpret_cast(&_src1)); + T2 *src2 = std::launder(reinterpret_cast(&_src2)); + T3 *vdst = std::launder(reinterpret_cast(&_vdst)); + + // Handling of src2 is a bit tricky. The operator[] overload cannot + // be used for dword count > 2, and the dword count here is 4. Usually + // src2 is a VGPR/AccGPR, but it might also be constant. In order to + // use operator[] and handle constants, check for VGPR here and set + // a delta for each of the src2 GPRs. + + int delta = isVectorReg(extData.SRC0) ? 1 : 0; + for (int i = 0; i < gprs_a; i++) { + new (&src0[i]) T1(gpuDynInst, extData.SRC0+acc_a_off+i*delta); + src0[i].readSrc(); + } + + delta = isVectorReg(extData.SRC1) ? 1 : 0; + for (int i = 0; i < gprs_b; i++) { + new (&src1[i]) T1(gpuDynInst, extData.SRC1+acc_b_off+i*delta); + src1[i].readSrc(); + } + + delta = isVectorReg(extData.SRC2) ? 1 : 0; + for (int i = 0; i < gprs_c_d; i++) { + new (&src2[i]) T2(gpuDynInst, extData.SRC2+acc_cd_off+i*delta); + src2[i].readSrc(); + } + + for (int i = 0; i < gprs_c_d; i++) { + new (&vdst[i]) T3(gpuDynInst, instData.VDST+acc_cd_off+i); + } + + // These values and meanings are described in the MI300 ISA manual: + // + // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/ + // instruction-set-architectures/ + // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf + // + // in section 7.1.4.2. In theory, only the M, N, K, and H values change + // for each MFMA instruction. + + // Output layout + constexpr int H = 4; + + // This replaces `constexpr int B_I = std::ceil(64.0f / (N * M / H));` + // which failed clang compiler tests as it's not a constant expression. + constexpr float B_I_f = 64.0f / (N * M / H); + constexpr int B_I = + (static_cast(static_cast(B_I_f)) == B_I_f) + ? static_cast(B_I_f) + : static_cast(B_I_f) + ((B_I_f > 0) ? 1 : 0); + constexpr int M_I = (64 / B_I) / N; + constexpr int G = M / (H * M_I); + + float result[M][N]; + + // Input layout + constexpr int K_L = K / (64 / (M * B)); + + for (int block = 0; block < B; block++) { + // Load src2 into result. src2 is row major + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int item = (i % H) + H * (i/(H*M_I) + G * (block / B_I)); + int lane = j + N * ((i / H) % M_I + M_I * (block % B_I)); + + result[i][j] = src2[item][lane]; + } + } + + // Compute new result + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + for (int k = 0; k < K; ++k) { + // src0 is column major, src1 is row major + int lane_A = i + M * (block + B * (k / K_L)); + int lane_B = j + N * (block + B * (k / K_L)); + int item = k % K_L; + + PackedReg A_elems; + PackedReg B_elems; + + for (int i = 0; i < gprs_a; ++i) { + A_elems.setDword(i, src0[i][lane_A]); + } + for (int i = 0; i < gprs_b; ++i) { + B_elems.setDword(i, src1[i][lane_B]); + } + + MXFPT item_A(A_elems.getElem(item)); + MXFPT item_B(B_elems.getElem(item)); + + result[i][j] += item_A * item_B; + } + } + } + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int item = (i % H) + H * (i/(H*M_I) + G * (block / B_I)); + int lane = j + N * ((i / H) % M_I + M_I * (block % B_I)); + + vdst[item][lane] = result[i][j]; + } + } + } + + for (int i = 0; i < gprs_c_d; ++i) { + vdst[i].write(); + } + + for (int i = 0; i < gprs_a; i++) { + std::destroy_at(&src0[i]); + } + for (int i = 0; i < gprs_b; i++) { + std::destroy_at(&src1[i]); + } + for (int i = 0; i < gprs_c_d; i++) { + std::destroy_at(&src2[i]); + } + for (int i = 0; i < gprs_c_d; i++) { + std::destroy_at(&vdst[i]); + } + } // execute + }; + + + static const char *MNEM__V_MFMA_F32_16X16X16_F16 = + "v_mfma_f32_16x16x16_f16"; + using Inst_VOP3P_MAI__V_MFMA_F32_16X16X16_F16 = + Inst_VOP3P_MAI__V_MFMA_MXFP<16, 16, 16, 1, AMDGPU::mxfloat16, + &MNEM__V_MFMA_F32_16X16X16_F16>; + + static const char *MNEM__V_MFMA_F32_16X16X4_4B_F16 = + "v_mfma_f32_16x16x4_4b_f16"; + using Inst_VOP3P_MAI__V_MFMA_F32_16X16X4_4B_F16 = + Inst_VOP3P_MAI__V_MFMA_MXFP<16, 16, 4, 4, AMDGPU::mxfloat16, + &MNEM__V_MFMA_F32_16X16X4_4B_F16>; + + static const char *MNEM__V_MFMA_F32_32X32X4_2B_F16 = + "v_mfma_f32_32x32x4_2b_f16"; + using Inst_VOP3P_MAI__V_MFMA_F32_32X32X4_2B_F16 = + Inst_VOP3P_MAI__V_MFMA_MXFP<32, 32, 4, 2, AMDGPU::mxfloat16, + &MNEM__V_MFMA_F32_32X32X4_2B_F16>; + + static const char *NMEM__V_MFMA_F32_32X32X8_F16 = + "v_mfma_f32_32x32x8_f16"; + using Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_F16 = + Inst_VOP3P_MAI__V_MFMA_MXFP<32, 32, 8, 1, AMDGPU::mxfloat16, + &NMEM__V_MFMA_F32_32X32X8_F16>; + + static const char *MNEM__V_MFMA_F32_4X4X4_16B_F16 = + "v_mfma_f32_4x4x4_16b_f16"; + using Inst_VOP3P_MAI__V_MFMA_F32_4X4X4_16B_F16 = + Inst_VOP3P_MAI__V_MFMA_MXFP<4, 4, 4, 16, AMDGPU::mxfloat16, + &MNEM__V_MFMA_F32_4X4X4_16B_F16>; + + static const char *MNEM__V_MFMA_F32_32X32X8_BF16 = + "v_mfma_f32_32x32x8_bf16"; + using Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_BF16 = + Inst_VOP3P_MAI__V_MFMA_MXFP<32, 32, 8, 1, AMDGPU::mxbfloat16, + &MNEM__V_MFMA_F32_32X32X8_BF16>; + + + template + class Inst_VOP3P_MAI__V_MFMA_I8 : public Inst_VOP3P_MAI + { + + private: + // Only int8 exists at the moment, but make the type a parameter. + using DT = int8_t; + static constexpr int DT_bits = sizeof(DT) * 8; + + // Scale GPRs needed by elements / GPR (gpr_ratio) + static constexpr int gpr_ratio = 32 / DT_bits; + static constexpr int gprs_a = M * K * B / (64 * gpr_ratio); + static constexpr int gprs_b = K * N * B / (64 * gpr_ratio); + + // Always F32 which has an effective gpr_ratio of 1 + static constexpr int gprs_c_d = M * N * B / 64; + + public: + Inst_VOP3P_MAI__V_MFMA_I8(InFmt_VOP3P_MAI *iFmt) + : Inst_VOP3P_MAI(iFmt, *MNEMONIC) + { + setFlag(ALU); + setFlag(MFMA); + setFlag(I8); + } + ~Inst_VOP3P_MAI__V_MFMA_I8() {} + + int getNumOperands() override { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int getOperandSize(int opIdx) override { + switch (opIdx) { + case 0: // src0 "A" + return 4*gprs_a; + case 1: // src1 "B" + return 4*gprs_b; + case 2: // src2 "C" + return 4*gprs_c_d; + case 3: // dst + return 4*gprs_c_d; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void + execute(GPUDynInstPtr gpuDynInst) override + { + int acc_cd_off = 0; + int acc_a_off = 0; + int acc_b_off = 0; + if (instData.ACC_CD) { + acc_cd_off = gpuDynInst->wavefront()->accumOffset; + } + if (extData.ACC) { + int tmp_acc = extData.ACC; + if (tmp_acc & 0x1) { + acc_a_off = gpuDynInst->wavefront()->accumOffset; + } + if (tmp_acc & 0x2) { + acc_b_off = gpuDynInst->wavefront()->accumOffset; + } + } + + // Read the packed types as U32 - Consider this "untyped." + // A ConstVecOperand needs to be used for src2 as it could be an + // inline constant. The Const version provides an operator[] overload + // to read inline constants to each lane. The non-const type of src2 + // should be used for vdst to make it writeable. + using T1 = ConstVecOperandU32; + using T2 = ConstVecOperandI32; + using T3 = VecOperandI32; + + alignas(T1) std::byte _src0[sizeof(T1) * gprs_a]; + alignas(T1) std::byte _src1[sizeof(T1) * gprs_b]; + alignas(T2) std::byte _src2[sizeof(T2) * gprs_c_d]; + alignas(T3) std::byte _vdst[sizeof(T3) * gprs_c_d]; + T1 *src0 = std::launder(reinterpret_cast(&_src0)); + T1 *src1 = std::launder(reinterpret_cast(&_src1)); + T2 *src2 = std::launder(reinterpret_cast(&_src2)); + T3 *vdst = std::launder(reinterpret_cast(&_vdst)); + + // Handling of src2 is a bit tricky. The operator[] overload cannot + // be used for dword count > 2, and the dword count here is 4. Usually + // src2 is a VGPR/AccGPR, but it might also be constant. In order to + // use operator[] and handle constants, check for VGPR here and set + // a delta for each of the src2 GPRs. + + int delta = isVectorReg(extData.SRC0) ? 1 : 0; + for (int i = 0; i < gprs_a; i++) { + new (&src0[i]) T1(gpuDynInst, extData.SRC0+acc_a_off+i*delta); + src0[i].readSrc(); + } + + delta = isVectorReg(extData.SRC1) ? 1 : 0; + for (int i = 0; i < gprs_b; i++) { + new (&src1[i]) T1(gpuDynInst, extData.SRC1+acc_b_off+i*delta); + src1[i].readSrc(); + } + + delta = isVectorReg(extData.SRC2) ? 1 : 0; + for (int i = 0; i < gprs_c_d; i++) { + new (&src2[i]) T2(gpuDynInst, extData.SRC2+acc_cd_off+i*delta); + src2[i].readSrc(); + } + + for (int i = 0; i < gprs_c_d; i++) { + new (&vdst[i]) T3(gpuDynInst, instData.VDST+acc_cd_off+i); + } + + // These values and meanings are described in the MI300 ISA manual: + // + // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/ + // instruction-set-architectures/ + // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf + // + // in section 7.1.4.2. In theory, only the M, N, K, and H values change + // for each MFMA instruction. + + // Output layout + constexpr int H = 4; + + // This replaces `constexpr int B_I = std::ceil(64.0f / (N * M / H));` + // which failed clang compiler tests as it's not a constant expression. + constexpr float B_I_f = 64.0f / (N * M / H); + constexpr int B_I = + (static_cast(static_cast(B_I_f)) == B_I_f) + ? static_cast(B_I_f) + : static_cast(B_I_f) + ((B_I_f > 0) ? 1 : 0); + + constexpr int M_I = (64 / B_I) / N; + constexpr int G = M / (H * M_I); + + int32_t result[M][N]; + + // Input layout + constexpr int K_L = K / (64 / (M * B)); + + for (int block = 0; block < B; block++) { + // Load src2 into result. src2 is row major + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int item = (i % H) + H * (i/(H*M_I) + G * (block / B_I)); + int lane = j + N * ((i / H) % M_I + M_I * (block % B_I)); + + result[i][j] = src2[item][lane]; + } + } + + // Compute new result + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + for (int k = 0; k < K; ++k) { + // src0 is column major, src1 is row major + int lane_A = i + M * (block + B * (k / K_L)); + int lane_B = j + N * (block + B * (k / K_L)); + int item = k % K_L; + + PackedReg A_elems; + PackedReg B_elems; + + for (int i = 0; i < gprs_a; ++i) { + A_elems.setDword(i, src0[i][lane_A]); + } + for (int i = 0; i < gprs_b; ++i) { + B_elems.setDword(i, src1[i][lane_B]); + } + + DT item_A(A_elems.getElem(item)); + DT item_B(B_elems.getElem(item)); + + result[i][j] += int32_t(item_A) * int32_t(item_B); + } + } + } + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < N; ++j) { + int item = (i % H) + H * (i/(H*M_I) + G * (block / B_I)); + int lane = j + N * ((i / H) % M_I + M_I * (block % B_I)); + + vdst[item][lane] = result[i][j]; + } + } + } + + for (int i = 0; i < gprs_c_d; ++i) { + vdst[i].write(); + } + + for (int i = 0; i < gprs_a; i++) { + std::destroy_at(&src0[i]); + } + for (int i = 0; i < gprs_b; i++) { + std::destroy_at(&src1[i]); + } + for (int i = 0; i < gprs_c_d; i++) { + std::destroy_at(&src2[i]); + } + for (int i = 0; i < gprs_c_d; i++) { + std::destroy_at(&vdst[i]); + } + } // execute + }; + + static const char *MNEM__V_MFMA_I32_16X16X16_I8 = + "v_mfma_i32_16x16x16_i8"; + using Inst_VOP3P_MAI__V_MFMA_I32_16X16X16_I8 = + Inst_VOP3P_MAI__V_MFMA_I8<16, 16, 16, 1, + &MNEM__V_MFMA_I32_16X16X16_I8>; + + + class Inst_VOP3__V_CVT_PK_FP8_F32 : public Inst_VOP3A + { + public: + Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A*); + ~Inst_VOP3__V_CVT_PK_FP8_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3__V_CVT_PK_FP8_F32 } // namespace VegaISA } // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/mimg.cc b/src/arch/amdgpu/vega/insts/mimg.cc new file mode 100644 index 0000000000..29a37cca1d --- /dev/null +++ b/src/arch/amdgpu/vega/insts/mimg.cc @@ -0,0 +1,2047 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_MIMG__IMAGE_LOAD class methods --- + + Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD + + Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD() + { + } // ~Inst_MIMG__IMAGE_LOAD + + // --- description from .arch file --- + // Image memory load with format conversion specified in T#. No sampler. + void + Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_MIP class methods --- + + Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_mip") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_MIP + + Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP() + { + } // ~Inst_MIMG__IMAGE_LOAD_MIP + + // --- description from .arch file --- + // Image memory load with user-supplied mip level. No sampler. + void + Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_PCK class methods --- + + Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_pck") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_PCK + + Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK() + { + } // ~Inst_MIMG__IMAGE_LOAD_PCK + + // --- description from .arch file --- + // Image memory load with no format conversion. No sampler. + void + Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_PCK_SGN class methods --- + + Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_pck_sgn") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_PCK_SGN + + Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN() + { + } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN + + // --- description from .arch file --- + // Image memory load with with no format conversion and sign extension. No + // --- sampler. + void + Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK class methods --- + + Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_mip_pck") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_MIP_PCK + + Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK() + { + } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK + + // --- description from .arch file --- + // Image memory load with user-supplied mip level, no format conversion. No + // --- sampler. + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN class methods --- + + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_load_mip_pck_sgn") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN + + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN() + { + } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN + + // --- description from .arch file --- + // Image memory load with user-supplied mip level, no format conversion and + // --- with sign extension. No sampler. + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE class methods --- + + Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE + + Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE() + { + } // ~Inst_MIMG__IMAGE_STORE + + // --- description from .arch file --- + // Image memory store with format conversion specified in T#. No sampler. + void + Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE_MIP class methods --- + + Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store_mip") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE_MIP + + Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP() + { + } // ~Inst_MIMG__IMAGE_STORE_MIP + + // --- description from .arch file --- + // Image memory store with format conversion specified in T# to user + // specified mip level. No sampler. + void + Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE_PCK class methods --- + + Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store_pck") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE_PCK + + Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK() + { + } // ~Inst_MIMG__IMAGE_STORE_PCK + + // --- description from .arch file --- + // Image memory store of packed data without format conversion. No sampler. + void + Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_STORE_MIP_PCK class methods --- + + Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_store_mip_pck") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_STORE_MIP_PCK + + Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK() + { + } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK + + // --- description from .arch file --- + // Image memory store of packed data without format conversion to + // user-supplied mip level. No sampler. + void + Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MIMG__IMAGE_GET_RESINFO class methods --- + + Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_get_resinfo") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GET_RESINFO + + Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO() + { + } // ~Inst_MIMG__IMAGE_GET_RESINFO + + // --- description from .arch file --- + // return resource info for a given mip level specified in the address + // vgpr. No sampler. Returns 4 integer values into VGPRs 3-0: + // {num_mip_levels, depth, height, width}. + void + Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SWAP class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_swap") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SWAP + + Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_CMPSWAP class methods --- + + Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_cmpswap") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP + + Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA[0]; + // cmp = DATA[1]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_ADD class methods --- + + Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_add") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_ADD + + Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_ADD + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SUB class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_sub") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SUB + + Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SUB + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SMIN class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_smin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SMIN + + Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_UMIN class methods --- + + Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_umin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_UMIN + + Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_SMAX class methods --- + + Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_smax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_SMAX + + Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_UMAX class methods --- + + Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_umax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_UMAX + + Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_AND class methods --- + + Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_and") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_AND + + Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_AND + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_OR class methods --- + + Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_or") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_OR + + Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_OR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_XOR class methods --- + + Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_xor") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_XOR + + Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_XOR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_INC class methods --- + + Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_inc") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_INC + + Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_INC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_ATOMIC_DEC class methods --- + + Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_atomic_dec") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_ATOMIC_DEC + + Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC() + { + } // ~Inst_MIMG__IMAGE_ATOMIC_DEC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE class methods --- + + Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample") + { + } // Inst_MIMG__IMAGE_SAMPLE + + Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE() + { + } // ~Inst_MIMG__IMAGE_SAMPLE + + // --- description from .arch file --- + // sample texture map. + void + Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CL + + Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D + + Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D + + // --- description from .arch file --- + // sample texture map, with user derivatives + void + Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D_CL + + Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader, with user + // --- derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_L class methods --- + + Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_L + + Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_L + + // --- description from .arch file --- + // sample texture map, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B + + Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B + + // --- description from .arch file --- + // sample texture map, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B_CL + + Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_LZ class methods --- + + Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_LZ + + Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_LZ + + // --- description from .arch file --- + // sample texture map, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C + + Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C + + // --- description from .arch file --- + // sample texture map, with PCF. + void + Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CL + + Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D + + Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D + + // --- description from .arch file --- + // SAMPLE_C, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_L class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_L + + Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_L + + // --- description from .arch file --- + // SAMPLE_C, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B + + Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B + + // --- description from .arch file --- + // SAMPLE_C, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_LZ + + Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ + + // --- description from .arch file --- + // SAMPLE_C, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_O + + Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_O + + // --- description from .arch file --- + // sample texture map, with user offsets. + void + Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CL_O + + Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O + + // --- description from .arch file --- + // SAMPLE_O with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D_O + + Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D_O + + // --- description from .arch file --- + // SAMPLE_O, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_D_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_d_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O + + Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O + + // --- description from .arch file --- + // SAMPLE_O, with LOD clamp specified in shader, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_L_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_L_O + + Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_L_O + + // --- description from .arch file --- + // SAMPLE_O, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B_O + + Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B_O + + // --- description from .arch file --- + // SAMPLE_O, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_B_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O + + Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O + + // --- description from .arch file --- + // SAMPLE_O, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_LZ_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_LZ_O + + Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O + + // --- description from .arch file --- + // SAMPLE_O, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_O + + Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_O + + // --- description from .arch file --- + // SAMPLE_C with user specified offsets. + void + Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader. + void + Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D_O + + Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O + + // --- description from .arch file --- + // SAMPLE_C_O, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_d_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives. + void + Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_L_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_L_O + + Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O + + // --- description from .arch file --- + // SAMPLE_C_O, with user LOD. + void + Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B_O + + Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O + + // --- description from .arch file --- + // SAMPLE_C_O, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader, with lod bias. + void + Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_LZ_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O + + Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O + + // --- description from .arch file --- + // SAMPLE_C_O, from level 0. + void + Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4 class methods --- + + Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4 + + Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4() + { + } // ~Inst_MIMG__IMAGE_GATHER4 + + // --- description from .arch file --- + // gather 4 single component elements (2x2). + void + Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_CL + + Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD clamp. + void + Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_L class methods --- + + Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_L + + Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L() + { + } // ~Inst_MIMG__IMAGE_GATHER4_L + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD. + void + Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B class methods --- + + Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B + + Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias. + void + Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B_CL + + Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias and clamp. + void + Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_LZ class methods --- + + Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_LZ + + Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ() + { + } // ~Inst_MIMG__IMAGE_GATHER4_LZ + + // --- description from .arch file --- + // gather 4 single component elements (2x2) at level 0. + void + Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C class methods --- + + Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C + + Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with PCF. + void + Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_CL + + Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD clamp and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_L class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_l") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_L + + Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_L + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user LOD and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B + + Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B_CL + + Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL + + // --- description from .arch file --- + // gather 4 single component elements (2x2) with user bias, clamp and PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_LZ class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_lz") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_LZ + + Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ + + // --- description from .arch file --- + // gather 4 single component elements (2x2) at level 0, with PCF. + void + Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_O + + Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_O + + // --- description from .arch file --- + // GATHER4, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_CL_O + + Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_CL_O + + // --- description from .arch file --- + // GATHER4_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_L_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_L_O + + Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_L_O + + // --- description from .arch file --- + // GATHER4_L, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B_O + + Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B_O + + // --- description from .arch file --- + // GATHER4_B, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_B_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_B_CL_O + + Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O + + // --- description from .arch file --- + // GATHER4_B_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_LZ_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_LZ_O + + Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O + + // --- description from .arch file --- + // GATHER4_LZ, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_O + + Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_O + + // --- description from .arch file --- + // GATHER4_C, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_CL_O + + Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O + + // --- description from .arch file --- + // GATHER4_C_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_L_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_l_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_L_O + + Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O + + // --- description from .arch file --- + // GATHER4_C_L, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B_O + + Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O + + // --- description from .arch file --- + // GATHER4_B, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_B_CL_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O + + Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O + + // --- description from .arch file --- + // GATHER4_B_CL, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GATHER4_C_LZ_O class methods --- + + Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_gather4_c_lz_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O + + Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O() + { + } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O + + // --- description from .arch file --- + // GATHER4_C_LZ, with user offsets. + void + Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_GET_LOD class methods --- + + Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_get_lod") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_GET_LOD + + Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD() + { + } // ~Inst_MIMG__IMAGE_GET_LOD + + // --- description from .arch file --- + // Return calculated LOD. Vdata gets 2 32bit integer values: { rawLOD, + // --- clampedLOD }. + void + Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD + + Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD + + // --- description from .arch file --- + // sample texture map, with user derivatives (LOD per quad) + void + Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD_CL + + Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL + + // --- description from .arch file --- + // sample texture map, with LOD clamp specified in shader, with user + // --- derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD + + Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD + + // --- description from .arch file --- + // SAMPLE_C, with user derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd_cl") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL + + // --- description from .arch file --- + // SAMPLE_C, with LOD clamp specified in shader, with user derivatives + // (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD_O + + Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O + + // --- description from .arch file --- + // SAMPLE_O, with user derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_CD_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_cd_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O + + Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O + + // --- description from .arch file --- + // SAMPLE_O, with LOD clamp specified in shader, with user derivatives + // (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O + + Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O + + // --- description from .arch file --- + // SAMPLE_C_O, with user derivatives (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O class methods --- + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O( + InFmt_MIMG *iFmt) + : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o") + { + setFlag(GlobalSegment); + } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O + + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O() + { + } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O + + // --- description from .arch file --- + // SAMPLE_C_O, with LOD clamp specified in shader, with user derivatives + // (LOD per quad). + void + Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/mtbuf.cc b/src/arch/amdgpu/vega/insts/mtbuf.cc new file mode 100644 index 0000000000..2b37dfd6b9 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/mtbuf.cc @@ -0,0 +1,584 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_X class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X + + // --- description from .arch file --- + // Typed buffer load 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY + + // --- description from .arch file --- + // Typed buffer load 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ + + // --- description from .arch file --- + // Typed buffer load 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW + + // --- description from .arch file --- + // Typed buffer load 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_X class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_X + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_x") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X + + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X + + // --- description from .arch file --- + // Typed buffer store 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XY class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY + + // --- description from .arch file --- + // Typed buffer store 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ + + // --- description from .arch file --- + // Typed buffer store 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW + + // --- description from .arch file --- + // Typed buffer store 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X:: + ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X + + // --- description from .arch file --- + // Typed buffer load 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY + + // --- description from .arch file --- + // Typed buffer load 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ( + InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Typed buffer load 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW class methods --- + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW( + InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Typed buffer load 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X + + // --- description from .arch file --- + // Typed buffer store 1 dword with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY + + // --- description from .arch file --- + // Typed buffer store 2 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Typed buffer store 3 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW class methods --- + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt) + : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW() + { + } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Typed buffer store 4 dwords with format conversion. + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute( + GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/mubuf.cc b/src/arch/amdgpu/vega/insts/mubuf.cc new file mode 100644 index 0000000000..ffc68e8c2b --- /dev/null +++ b/src/arch/amdgpu/vega/insts/mubuf.cc @@ -0,0 +1,2996 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_X class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_X + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X + + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X + + // --- description from .arch file --- + // Untyped buffer load 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XY class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY + + // --- description from .arch file --- + // Untyped buffer load 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ + + // --- description from .arch file --- + // Untyped buffer load 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW + + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW + + // --- description from .arch file --- + // Untyped buffer load 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_X class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_X + ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_x") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_X + + Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X + + // --- description from .arch file --- + // Untyped buffer store 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XY class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_XY + ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY + + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY + + // --- description from .arch file --- + // Untyped buffer store 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ + ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ + + // --- description from .arch file --- + // Untyped buffer store 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW + + // --- description from .arch file --- + // Untyped buffer store 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_x") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X + + // --- description from .arch file --- + // Untyped buffer load 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY + + // --- description from .arch file --- + // Untyped buffer load 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Untyped buffer load 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW class methods --- + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW() + { + } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Untyped buffer load 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_x") + { + setFlag(Store); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X + + // --- description from .arch file --- + // Untyped buffer store 1 dword with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY + + // --- description from .arch file --- + // Untyped buffer store 2 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ + + // --- description from .arch file --- + // Untyped buffer store 3 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW class methods --- + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw") + { + setFlag(MemoryRef); + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW() + { + } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW + + // --- description from .arch file --- + // Untyped buffer store 4 dwords with format conversion. + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc( + GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc( + GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_UBYTE class methods --- + + Inst_MUBUF__BUFFER_LOAD_UBYTE + ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_ubyte") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_UBYTE + + Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE() + { + } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE + + // --- description from .arch file --- + // Untyped buffer load unsigned byte (zero extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // execute + + // --- Inst_MUBUF__BUFFER_LOAD_SBYTE class methods --- + + Inst_MUBUF__BUFFER_LOAD_SBYTE + ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_sbyte") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_SBYTE + + Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE() + { + } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE + + // --- description from .arch file --- + // Untyped buffer load signed byte (sign extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_USHORT class methods --- + + Inst_MUBUF__BUFFER_LOAD_USHORT + ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_ushort") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_USHORT + + Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT() + { + } // ~Inst_MUBUF__BUFFER_LOAD_USHORT + + // --- description from .arch file --- + // Untyped buffer load unsigned short (zero extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst[lane] = (VecElemU32)((reinterpret_cast( + gpuDynInst->d_data))[lane]); + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // execute + + // --- Inst_MUBUF__BUFFER_LOAD_SSHORT class methods --- + + Inst_MUBUF__BUFFER_LOAD_SSHORT + ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_sshort") + { + setFlag(MemoryRef); + setFlag(Load); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_LOAD_SSHORT + + Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT() + { + } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT + + // --- description from .arch file --- + // Untyped buffer load signed short (sign extend to VGPR destination). + void + Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16 class methods --- + + Inst_MUBUF__BUFFER_LOAD_SHORT_D16 + ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_short_d16") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + warn("BUFFER.LDS not implemented!"); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16 + + Inst_MUBUF__BUFFER_LOAD_SHORT_D16::~Inst_MUBUF__BUFFER_LOAD_SHORT_D16() + { + } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16 + + // --- description from .arch file --- + // RETURN_DATA[15 : 0].u16 = MEM[ADDR].u16; + // // RETURN_DATA[31:16] is preserved. + void + Inst_MUBUF__BUFFER_LOAD_SHORT_D16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + // For explanation of buffer addressing, see section 9.1.5 in: + // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/ + // instruction-set-architectures/ + // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_SHORT_D16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_SHORT_D16::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + VecElemU16 buf_val = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + replaceBits(vdst[lane], 15, 0, buf_val); + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI class methods --- + + Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI + ::Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_short_d16_hi") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + warn("BUFFER.LDS not implemented!"); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI + + Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI:: + ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI() + { + } // ~Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI + + // --- description from .arch file --- + // VDATA[31 : 16].b16 = MEM[ADDR].b16; + // // VDATA[15:0] is preserved. + void + Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + // For explanation of buffer addressing, see section 9.1.5 in: + // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/ + // instruction-set-architectures/ + // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_SHORT_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + VecElemU16 buf_val = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + replaceBits(vdst[lane], 31, 16, buf_val); + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORD class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORD + ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORD + + Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORD + + // --- description from .arch file --- + // Untyped buffer load dword. + void + Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + // For explanation of buffer addressing, see section 9.1.5 in: + // https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/ + // instruction-set-architectures/ + // amd-instinct-mi300-cdna3-instruction-set-architecture.pdf + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } else { + vdst[lane] = 0; + } + } + } + + vdst.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORDX2 class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORDX2 + ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORDX2 + + Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer load 2 dwords. + void + Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDATA); + VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 2 + 1]; + } else { + vdst0[lane] = 0; + vdst1[lane] = 0; + } + } + } + + vdst0.write(); + vdst1.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORDX3 class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORDX3 + ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dwordx3") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORDX3 + + Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer load 3 dwords. + void + Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<3>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDATA); + VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 3 + 2]; + } else { + vdst0[lane] = 0; + vdst1[lane] = 0; + vdst2[lane] = 0; + } + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_LOAD_DWORDX4 class methods --- + + Inst_MUBUF__BUFFER_LOAD_DWORDX4 + ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_LOAD_DWORDX4 + + Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4() + { + } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer load 4 dwords. + void + Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + + rsrcDesc.read(); + offset.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + VecOperandU32 vdst0(gpuDynInst, extData.VDATA); + VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1); + VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2); + VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if (!oobMask[lane]) { + vdst0[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4]; + vdst1[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 1]; + vdst2[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 2]; + vdst3[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane * 4 + 3]; + } else { + vdst0[lane] = 0; + vdst1[lane] = 0; + vdst2[lane] = 0; + vdst3[lane] = 0; + } + } + } + + vdst0.write(); + vdst1.write(); + vdst2.write(); + vdst3.write(); + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_BYTE class methods --- + + Inst_MUBUF__BUFFER_STORE_BYTE + ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_byte") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_BYTE + + Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE() + { + } // ~Inst_MUBUF__BUFFER_STORE_BYTE + + // --- description from .arch file --- + // Untyped buffer store byte. + void + Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandI8 data(gpuDynInst, extData.VDATA); + + rsrcDesc.read(); + offset.read(); + data.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_SHORT class methods --- + + Inst_MUBUF__BUFFER_STORE_SHORT + ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_short") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_SHORT + + Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT() + { + } // ~Inst_MUBUF__BUFFER_STORE_SHORT + + // --- description from .arch file --- + // Untyped buffer store short. + void + Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandI16 data(gpuDynInst, extData.VDATA); + + rsrcDesc.read(); + offset.read(); + data.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_MUBUF__BUFFER_STORE_DWORD class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORD:: + Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORD + + Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORD + + // --- description from .arch file --- + // Untyped buffer store dword. + void + Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data(gpuDynInst, extData.VDATA); + + rsrcDesc.read(); + offset.read(); + data.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane] + = data[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_DWORDX2 class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORDX2 + ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORDX2 + + Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2 + + // --- description from .arch file --- + // Untyped buffer store 2 dwords. + void + Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); + ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); + + rsrcDesc.read(); + offset.read(); + data0.read(); + data1.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 4] + = data0[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] + = data1[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<2>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_DWORDX3 class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORDX3 + ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dwordx3") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORDX3 + + Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3 + + // --- description from .arch file --- + // Untyped buffer store 3 dwords. + void + Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); + ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); + + rsrcDesc.read(); + offset.read(); + data0.read(); + data1.read(); + data2.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 4] + = data0[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] + = data1[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] + = data2[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<3>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_DWORDX4 class methods --- + + Inst_MUBUF__BUFFER_STORE_DWORDX4 + ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + if (instData.LDS) { + setFlag(GroupSegment); + } else { + setFlag(GlobalSegment); + } + } // Inst_MUBUF__BUFFER_STORE_DWORDX4 + + Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4() + { + } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4 + + // --- description from .arch file --- + // Untyped buffer store 4 dwords. + void + Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + wf->decExpInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 data0(gpuDynInst, extData.VDATA); + ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1); + ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2); + ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3); + + rsrcDesc.read(); + offset.read(); + data0.read(); + data1.read(); + data2.read(); + data3.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->d_data))[lane * 4] + = data0[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 1] + = data1[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 2] + = data2[lane]; + (reinterpret_cast(gpuDynInst->d_data))[lane*4 + 3] + = data3[lane]; + } + } + } // execute + + void + Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<4>(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_STORE_LDS_DWORD class methods --- + + Inst_MUBUF__BUFFER_STORE_LDS_DWORD + ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_store_lds_dword") + { + setFlag(Store); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD + + Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD() + { + } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD + + // --- description from .arch file --- + // Store one DWORD from LDS memory to system memory without utilizing + // VGPRs. + void + Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_WBINVL1 class methods --- + + Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_wbinvl1") + { + setFlag(MemoryRef); + setFlag(GPUStaticInst::MemSync); + setFlag(GlobalSegment); + setFlag(MemSync); + } // Inst_MUBUF__BUFFER_WBINVL1 + + Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1() + { + } // ~Inst_MUBUF__BUFFER_WBINVL1 + + // --- description from .arch file --- + // Write back and invalidate the shader L1. + // Always returns ACK to shader. + void + Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } + } // execute + + void + Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst) + { + // TODO: Fix it for gfx10. Once we have the new gfx10 cache model, we + // need to precisely communicate the writeback-invalidate operation to + // the new gfx10 coalescer rather than sending AcquireRelease markers. + // The SICoalescer would need to be updated appropriately as well. + injectGlobalMemFence(gpuDynInst); + } // initiateAcc + void + Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_WBINVL1_VOL class methods --- + + Inst_MUBUF__BUFFER_WBINVL1_VOL + ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt) + : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") { + // This instruction is same as buffer_wbinvl1 instruction except this + // instruction only invalidate L1 shader line with MTYPE SC and GC. + // Since Hermes L1 (TCP) do not differentiate between its cache lines, + // this instruction currently behaves (and implemented ) exactly like + // buffer_wbinvl1 instruction. + setFlag(MemoryRef); + setFlag(GPUStaticInst::MemSync); + setFlag(GlobalSegment); + setFlag(MemSync); + } // Inst_MUBUF__BUFFER_WBINVL1_VOL + + Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL() + { + } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL + + // --- description from .arch file --- + // Write back and invalidate the shader L1 only for lines that are marked + // --- volatile. + // Always returns ACK to shader. + void + Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + if (gpuDynInst->executedAs() == enums::SC_GLOBAL) { + gpuDynInst->computeUnit()->globalMemoryPipe. + issueRequest(gpuDynInst); + } else { + fatal("Unsupported scope for flat instruction.\n"); + } + } // execute + void + Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst) + { + injectGlobalMemFence(gpuDynInst); + } // initiateAcc + void + Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SWAP + ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_swap") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SWAP + + Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP + ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // src = DATA[0]; + // cmp = DATA[1]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR); + ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4); + ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET); + ConstVecOperandU32 src(gpuDynInst, extData.VDATA); + ConstVecOperandU32 cmp(gpuDynInst, extData.VDATA + 1); + + rsrcDesc.read(); + offset.read(); + src.read(); + cmp.read(); + + int inst_offset = instData.OFFSET; + + if (!instData.IDXEN && !instData.OFFEN) { + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (!instData.IDXEN && instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr0, addr1, rsrcDesc, offset, inst_offset); + } else if (instData.IDXEN && !instData.OFFEN) { + addr0.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } else { + addr0.read(); + addr1.read(); + calcAddr(gpuDynInst, + addr1, addr0, rsrcDesc, offset, inst_offset); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + (reinterpret_cast(gpuDynInst->x_data))[lane] + = src[lane]; + (reinterpret_cast(gpuDynInst->a_data))[lane] + = cmp[lane]; + } + } + + gpuDynInst->computeUnit()->globalMemoryPipe.issueRequest(gpuDynInst); + } // execute + + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initAtomicAccess(gpuDynInst); + } // initiateAcc + + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + VecOperandU32 vdst(gpuDynInst, extData.VDATA); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } // completeAcc + // --- Inst_MUBUF__BUFFER_ATOMIC_ADD class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_ADD + ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_add") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_ADD + + Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SUB class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SUB + ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_sub") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SUB + + Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMIN + ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMIN + + Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMIN + ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umin") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMIN + + Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMAX + ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMAX + + Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMAX + ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umax") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMAX + + Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_AND class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_AND + ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_and") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_AND + + Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_AND + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_OR class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_OR + ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_or") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_OR + + Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_OR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_XOR class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_XOR + ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_xor") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_XOR + + Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA; + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_INC class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_INC + ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_inc") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_INC + + Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_INC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_DEC class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_DEC + ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_dec") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_DEC + + Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC + + // --- description from .arch file --- + // 32b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1 + // (unsigned compare); RETURN_DATA = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2") + { + setFlag(AtomicExch); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2") + { + setFlag(AtomicCAS); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // src = DATA[0:1]; + // cmp = DATA[2:3]; + // MEM[ADDR] = (tmp == cmp) ? src : tmp; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_add_x2") + { + setFlag(AtomicAdd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 + + Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] += DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2") + { + setFlag(AtomicSub); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2") + { + setFlag(AtomicMin); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 + + Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 + + Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2") + { + setFlag(AtomicMax); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 + + Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_AND_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_AND_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_and_x2") + { + setFlag(AtomicAnd); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2 + + Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] &= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_OR_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_OR_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_or_x2") + { + setFlag(AtomicOr); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2 + + Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] |= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2") + { + setFlag(AtomicXor); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 + + Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] ^= DATA[0:1]; + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_INC_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_INC_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2") + { + setFlag(AtomicInc); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2 + + Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 class methods --- + + Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 + ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt) + : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2") + { + setFlag(AtomicDec); + if (instData.GLC) { + setFlag(AtomicReturn); + } else { + setFlag(AtomicNoReturn); + } + setFlag(MemoryRef); + setFlag(GlobalSegment); + } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 + + Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2() + { + } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2 + + // --- description from .arch file --- + // 64b: + // tmp = MEM[ADDR]; + // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1 + // (unsigned compare); + // RETURN_DATA[0:1] = tmp. + void + Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/op_encodings.cc b/src/arch/amdgpu/vega/insts/op_encodings.cc index c934094d9b..0b4e894e75 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.cc +++ b/src/arch/amdgpu/vega/insts/op_encodings.cc @@ -1178,6 +1178,158 @@ namespace VegaISA disassembly = dis_stream.str(); } + // --- Inst_VOP3P base class methods --- + + Inst_VOP3P::Inst_VOP3P(InFmt_VOP3P *iFmt, const std::string &opcode) + : VEGAGPUStaticInst(opcode) + { + // copy first instruction DWORD + instData = iFmt[0]; + // copy second instruction DWORD + extData = ((InFmt_VOP3P_1 *)iFmt)[1]; + } // Inst_VOP3P + + Inst_VOP3P::~Inst_VOP3P() + { + } // ~Inst_VOP3P + + void + Inst_VOP3P::initOperandInfo() + { + // Also takes care of bitfield addr issue + unsigned int srcs[3] = {extData.SRC0, extData.SRC1, extData.SRC2}; + + int opNum = 0; + + int numSrc = numSrcRegOperands(); + + for (opNum = 0; opNum < numSrc; opNum++) { + srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true, + isScalarReg(srcs[opNum]), + isVectorReg(srcs[opNum]), false); + } + + // There is always one dest + // Needed because can't take addr of bitfield + int reg = instData.VDST; + dstOps.emplace_back(reg, getOperandSize(opNum), false, + false, true, false); + opNum++; + + assert(srcOps.size() == numSrcRegOperands()); + assert(dstOps.size() == numDstRegOperands()); + } + + int + Inst_VOP3P::instSize() const + { + return 8; + } // instSize + + void + Inst_VOP3P::generateDisassembly() + { + std::stringstream dis_stream; + dis_stream << _opcode << " "; + + // There is always a dest and the index is after the src operands + // The output size much be a multiple of dword size + int dst_size = getOperandSize(numSrcRegOperands()); + + dis_stream << opSelectorToRegSym(instData.VDST + 0x100, dst_size / 4); + + unsigned int srcs[3] = {extData.SRC0, extData.SRC1, extData.SRC2}; + for (int opnum = 0; opnum < numSrcRegOperands(); opnum++) { + int num_regs = getOperandSize(opnum) / 4; + dis_stream << ", " << opSelectorToRegSym(srcs[opnum], num_regs); + } + + // Print op_sel only if one is non-zero + if (instData.OPSEL) { + int opsel = instData.OPSEL; + + dis_stream << " op_sel:[" << bits(opsel, 0, 0) << "," + << bits(opsel, 1, 1) << "," << bits(opsel, 2, 2) << "]"; + } + + disassembly = dis_stream.str(); + } + + // --- Inst_VOP3P_MAI base class methods --- + + Inst_VOP3P_MAI::Inst_VOP3P_MAI(InFmt_VOP3P_MAI *iFmt, + const std::string &opcode) + : VEGAGPUStaticInst(opcode) + { + // copy first instruction DWORD + instData = iFmt[0]; + // copy second instruction DWORD + extData = ((InFmt_VOP3P_MAI_1 *)iFmt)[1]; + } // Inst_VOP3P_MAI + + Inst_VOP3P_MAI::~Inst_VOP3P_MAI() + { + } // ~Inst_VOP3P_MAI + + void + Inst_VOP3P_MAI::initOperandInfo() + { + // Also takes care of bitfield addr issue + unsigned int srcs[3] = {extData.SRC0, extData.SRC1, extData.SRC2}; + + int opNum = 0; + + int numSrc = numSrcRegOperands(); + + for (opNum = 0; opNum < numSrc; opNum++) { + srcOps.emplace_back(srcs[opNum], getOperandSize(opNum), true, + isScalarReg(srcs[opNum]), + isVectorReg(srcs[opNum]), false); + } + + // There is always one dest + // Needed because can't take addr of bitfield + int reg = instData.VDST; + dstOps.emplace_back(reg, getOperandSize(opNum), false, + false, true, false); + opNum++; + + assert(srcOps.size() == numSrcRegOperands()); + assert(dstOps.size() == numDstRegOperands()); + } + + int + Inst_VOP3P_MAI::instSize() const + { + return 8; + } // instSize + + void + Inst_VOP3P_MAI::generateDisassembly() + { + std::stringstream dis_stream; + dis_stream << _opcode << " "; + + // There is always a dest and the index is after the src operands + // The output size much be a multiple of dword size + int dst_size = getOperandSize(numSrcRegOperands()); + + // opSelectorToRegSym handles formating for us. VDST is always VGPR + // so only the last 8 bits are used. This adds the implicit 9th bit + // which is 1 for VGPRs as VGPR op nums are from 256-255. + int dst_opnum = instData.VDST + 0x100; + + dis_stream << opSelectorToRegSym(dst_opnum, dst_size / 4); + + unsigned int srcs[3] = {extData.SRC0, extData.SRC1, extData.SRC2}; + for (int opnum = 0; opnum < numSrcRegOperands(); opnum++) { + int num_regs = getOperandSize(opnum) / 4; + dis_stream << ", " << opSelectorToRegSym(srcs[opnum], num_regs); + } + + disassembly = dis_stream.str(); + } + // --- Inst_DS base class methods --- Inst_DS::Inst_DS(InFmt_DS *iFmt, const std::string &opcode) @@ -1695,10 +1847,10 @@ namespace VegaISA // One of the flat subtypes should be specified via flags assert(isFlat() ^ isFlatGlobal() ^ isFlatScratch()); - if (isFlat()) { - generateFlatDisassembly(); - } else if (isFlatGlobal() || isFlatScratch()) { + if (isFlatGlobal() || isFlatScratch()) { generateGlobalScratchDisassembly(); + } else if (isFlat()) { + generateFlatDisassembly(); } else { panic("Unknown flat subtype!\n"); } @@ -1710,13 +1862,19 @@ namespace VegaISA std::stringstream dis_stream; dis_stream << _opcode << " "; - if (isLoad()) - dis_stream << "v" << extData.VDST << ", "; + if (isLoad() || isAtomic()) { + int dst_size = getOperandSize(numSrcRegOperands()) / 4; + dis_stream << opSelectorToRegSym(extData.VDST + 0x100, dst_size) + << ", "; + } - dis_stream << "v[" << extData.ADDR << ":" << extData.ADDR + 1 << "]"; + dis_stream << opSelectorToRegSym(extData.ADDR + 0x100, 2); - if (isStore()) - dis_stream << ", v" << extData.DATA; + if (isStore() || isAtomic()) { + int src_size = getOperandSize(1) / 4; + dis_stream << ", " + << opSelectorToRegSym(extData.DATA + 0x100, src_size); + } disassembly = dis_stream.str(); } @@ -1736,25 +1894,38 @@ namespace VegaISA std::stringstream dis_stream; dis_stream << global_opcode << " "; - if (isLoad()) - dis_stream << "v" << extData.VDST << ", "; + if (isLoad() || isAtomic()) { + // dest is the first operand after all the src operands + int dst_size = getOperandSize(numSrcRegOperands()) / 4; + dis_stream << opSelectorToRegSym(extData.VDST + 0x100, dst_size) + << ", "; + } - if (extData.SADDR == 0x7f) - dis_stream << "v[" << extData.ADDR << ":" << extData.ADDR+1 << "]"; - else - dis_stream << "v" << extData.ADDR; + if (extData.SADDR == 0x7f) { + dis_stream << opSelectorToRegSym(extData.ADDR + 0x100, 2); + } else { + dis_stream << opSelectorToRegSym(extData.ADDR + 0x100, 1); + } - if (isStore()) - dis_stream << ", v" << extData.DATA; + if (isStore() || isAtomic()) { + int src_size = getOperandSize(1) / 4; + dis_stream << ", " + << opSelectorToRegSym(extData.DATA + 0x100, src_size); + } - if (extData.SADDR == 0x7f) + if (extData.SADDR == 0x7f) { dis_stream << ", off"; - else - dis_stream << ", s[" << extData.SADDR << ":" << extData.SADDR+1 - << "]"; + } else { + dis_stream << ", " << opSelectorToRegSym(extData.SADDR, 2); + } - if (instData.OFFSET) + if (instData.OFFSET) { dis_stream << " offset:" << instData.OFFSET; + } + + if (instData.GLC) { + dis_stream << " glc"; + } disassembly = dis_stream.str(); } diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh index a1c5e99c91..504946534f 100644 --- a/src/arch/amdgpu/vega/insts/op_encodings.hh +++ b/src/arch/amdgpu/vega/insts/op_encodings.hh @@ -455,6 +455,29 @@ namespace VegaISA // second instruction DWORD InFmt_VOP3_1 extData; + // Output modifier for VOP3 instructions. This 2-bit field can be set + // to "0" to do nothing, "1" to multiply output value by 2, "2" to + // multiply output value by 4, or "3" to divide output value by 2. If + // the instruction supports clamping, this is applied *before* clamp + // but after the abs and neg modifiers. + template + T omodModifier(T val, unsigned omod) + { + assert(omod < 4); + + if constexpr (std::is_floating_point_v) { + if (omod == 1) return val * T(2.0f); + if (omod == 2) return val * T(4.0f); + if (omod == 3) return val / T(2.0f); + } else { + assert(std::is_integral_v); + if (omod == 1) return val * T(2); + if (omod == 2) return val * T(4); + if (omod == 3) return val / T(2); + } + + return val; + } private: bool hasSecondDword(InFmt_VOP3A *); /** @@ -491,6 +514,199 @@ namespace VegaISA bool hasSecondDword(InFmt_VOP3B *); }; // Inst_VOP3B + class Inst_VOP3P : public VEGAGPUStaticInst + { + public: + Inst_VOP3P(InFmt_VOP3P*, const std::string &opcode); + ~Inst_VOP3P(); + + int instSize() const override; + void generateDisassembly() override; + + void initOperandInfo() override; + + protected: + // first instruction DWORD + InFmt_VOP3P instData; + // second instruction DWORD + InFmt_VOP3P_1 extData; + + template + void vop3pHelper(GPUDynInstPtr gpuDynInst, + T (*fOpImpl)(T, T, bool)) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 S0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 S1(gpuDynInst, extData.SRC1); + VecOperandU32 D(gpuDynInst, instData.VDST); + + S0.readSrc(); + S1.readSrc(); + + int opLo = instData.OPSEL; + int opHi = instData.OPSEL_HI2 << 2 | extData.OPSEL_HI; + int negLo = extData.NEG; + int negHi = instData.NEG_HI; + bool clamp = instData.CLMP; + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + T upper_val = fOpImpl(word(S0[lane], opHi, negHi, 0), + word(S1[lane], opHi, negHi, 1), + clamp); + T lower_val = fOpImpl(word(S0[lane], opLo, negLo, 0), + word(S1[lane], opLo, negLo, 1), + clamp); + + uint16_t upper_raw = + *reinterpret_cast(&upper_val); + uint16_t lower_raw = + *reinterpret_cast(&lower_val); + + D[lane] = upper_raw << 16 | lower_raw; + } + } + + D.write(); + } + + template + void vop3pHelper(GPUDynInstPtr gpuDynInst, + T (*fOpImpl)(T, T, T, bool)) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 S0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 S1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 S2(gpuDynInst, extData.SRC2); + VecOperandU32 D(gpuDynInst, instData.VDST); + + S0.readSrc(); + S1.readSrc(); + S2.readSrc(); + + int opLo = instData.OPSEL; + int opHi = instData.OPSEL_HI2 << 2 | extData.OPSEL_HI; + int negLo = extData.NEG; + int negHi = instData.NEG_HI; + bool clamp = instData.CLMP; + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + T upper_val = fOpImpl(word(S0[lane], opHi, negHi, 0), + word(S1[lane], opHi, negHi, 1), + word(S2[lane], opHi, negHi, 2), + clamp); + T lower_val = fOpImpl(word(S0[lane], opLo, negLo, 0), + word(S1[lane], opLo, negLo, 1), + word(S2[lane], opLo, negLo, 2), + clamp); + + uint16_t upper_raw = + *reinterpret_cast(&upper_val); + uint16_t lower_raw = + *reinterpret_cast(&lower_val); + + D[lane] = upper_raw << 16 | lower_raw; + } + } + + D.write(); + } + + void + dotHelper(GPUDynInstPtr gpuDynInst, + uint32_t (*fOpImpl)(uint32_t, uint32_t, uint32_t, bool)) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 S0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 S1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 S2(gpuDynInst, extData.SRC2); + VecOperandU32 D(gpuDynInst, instData.VDST); + + S0.readSrc(); + S1.readSrc(); + S2.readSrc(); + + // OPSEL[2] and OPSEL_HI2 are unused. Craft two dwords where: + // dword1[15:0] is upper/lower 16b of src0 based on opsel[0] + // dword1[31:15] is upper/lower 16b of src0 based on opsel_hi[0] + // dword2[15:0] is upper/lower 16b of src1 based on opsel[1] + // dword2[31:15] is upper/lower 16b of src1 based on opsel_hi[1] + int opLo = instData.OPSEL; + int opHi = extData.OPSEL_HI; + int negLo = extData.NEG; + int negHi = instData.NEG_HI; + bool clamp = instData.CLMP; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t dword1l = + word(S0[lane], opLo, negLo, 0); + uint32_t dword1h = + word(S0[lane], opHi, negHi, 0); + uint32_t dword2l = + word(S1[lane], opLo, negLo, 1); + uint32_t dword2h = + word(S1[lane], opHi, negHi, 1); + + uint32_t dword1 = (dword1h << 16) | dword1l; + uint32_t dword2 = (dword2h << 16) | dword2l; + + // Take in two uint32_t dwords and one src2 dword. The + // function will need to call bits to break up to the + // correct size and then reinterpret cast to the correct + // value. + D[lane] = fOpImpl(dword1, dword2, S2[lane], clamp); + } + } + + D.write(); + } + + private: + bool hasSecondDword(InFmt_VOP3P *); + + template + T + word(uint32_t data, int opSel, int neg, int opSelBit) + { + // This method assumes two words packed into a dword + static_assert(sizeof(T) == 2); + + bool select = bits(opSel, opSelBit, opSelBit); + uint16_t raw = select ? bits(data, 31, 16) + : bits(data, 15, 0); + + // Apply input modifiers. This may seem odd, but the hardware + // just flips the MSb instead of doing unary negation. + bool negate = bits(neg, opSelBit, opSelBit); + if (negate) { + raw ^= 0x8000; + } + + return *reinterpret_cast(&raw); + } + }; // Inst_VOP3P + + class Inst_VOP3P_MAI : public VEGAGPUStaticInst + { + public: + Inst_VOP3P_MAI(InFmt_VOP3P_MAI*, const std::string &opcode); + ~Inst_VOP3P_MAI(); + + int instSize() const override; + void generateDisassembly() override; + + void initOperandInfo() override; + + protected: + // first instruction DWORD + InFmt_VOP3P_MAI instData; + // second instruction DWORD + InFmt_VOP3P_MAI_1 extData; + + private: + bool hasSecondDword(InFmt_VOP3P_MAI *); + }; // Inst_VOP3P + class Inst_DS : public VEGAGPUStaticInst { public: @@ -1065,13 +1281,12 @@ namespace VegaISA // If saddr = 0x7f there is no scalar reg to read and address will // be a 64-bit address. Otherwise, saddr is the reg index for a // scalar reg used as the base address for a 32-bit address. - if ((saddr == 0x7f && (isFlatGlobal() || isFlatScratch())) - || isFlat()) { + if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) { ConstVecOperandU64 vbase(gpuDynInst, vaddr); vbase.read(); calcAddrVgpr(gpuDynInst, vbase, offset); - } else { + } else if (isFlatGlobal()) { // Assume we are operating in 64-bit mode and read a pair of // SGPRs for the address base. ConstScalarOperandU64 sbase(gpuDynInst, saddr); @@ -1081,6 +1296,68 @@ namespace VegaISA voffset.read(); calcAddrSgpr(gpuDynInst, voffset, sbase, offset); + // For scratch, saddr = 0x7f there is no scalar reg to read and + // a vgpr will be used for address offset. Otherwise, saddr is + // the sgpr index holding the address offset. For scratch + // instructions the offset GPR is always 32-bits. + } else if (saddr != 0x7f) { + assert(isFlatScratch()); + + ConstScalarOperandU32 soffset(gpuDynInst, saddr); + soffset.read(); + + ConstVecOperandU32 voffset(gpuDynInst, vaddr); + if (instData.SVE) { + voffset.read(); + } + + Addr flat_scratch_addr = readFlatScratch(gpuDynInst); + + int elemSize; + auto staticInst = gpuDynInst->staticInstruction(); + if (gpuDynInst->isLoad()) { + elemSize = staticInst->getOperandSize(2); + } else { + assert(gpuDynInst->isStore()); + elemSize = staticInst->getOperandSize(1); + } + + unsigned swizzleOffset = soffset.rawData() + offset; + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + swizzleOffset += instData.SVE ? voffset[lane] : 0; + gpuDynInst->addr.at(lane) = flat_scratch_addr + + swizzle(swizzleOffset, lane, elemSize); + } + } + } else { + assert(isFlatScratch()); + + ConstVecOperandU32 voffset(gpuDynInst, vaddr); + if (instData.SVE) { + voffset.read(); + } + + Addr flat_scratch_addr = readFlatScratch(gpuDynInst); + + int elemSize; + auto staticInst = gpuDynInst->staticInstruction(); + if (gpuDynInst->isLoad()) { + elemSize = staticInst->getOperandSize(2); + } else { + assert(gpuDynInst->isStore()); + elemSize = staticInst->getOperandSize(1); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + VecElemU32 vgpr_offset = + instData.SVE ? voffset[lane] : 0; + + gpuDynInst->addr.at(lane) = flat_scratch_addr + + swizzle(vgpr_offset + offset, lane, elemSize); + } + } } if (isFlat()) { @@ -1092,6 +1369,7 @@ namespace VegaISA assert(isFlatScratch()); gpuDynInst->staticInstruction()->executed_as = enums::SC_PRIVATE; + gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask); } } @@ -1113,6 +1391,77 @@ namespace VegaISA } } + // Execute for atomics is identical besides the flag set in the + // constructor, except cmpswap. For cmpswap, the offset to the "cmp" + // register is needed. For all other operations this offset is zero + // and implies the atomic is not a cmpswap. + // RegT defines the type of GPU register (e.g., ConstVecOperandU32) + // LaneT defines the type of the register elements (e.g., VecElemU32) + template + void + atomicExecute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decVMemInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + RegT data(gpuDynInst, extData.DATA); + RegT cmp(gpuDynInst, extData.DATA + CmpRegOffset); + + data.read(); + if constexpr (CmpRegOffset) { + cmp.read(); + } + + calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + if constexpr (CmpRegOffset) { + (reinterpret_cast( + gpuDynInst->x_data))[lane] = data[lane]; + (reinterpret_cast( + gpuDynInst->a_data))[lane] = cmp[lane]; + } else { + (reinterpret_cast(gpuDynInst->a_data))[lane] + = data[lane]; + } + } + } + + issueRequestHelper(gpuDynInst); + } + + // RegT defines the type of GPU register (e.g., ConstVecOperandU32) + // LaneT defines the type of the register elements (e.g., VecElemU32) + template + void + atomicComplete(GPUDynInstPtr gpuDynInst) + { + if (isAtomicRet()) { + RegT vdst(gpuDynInst, extData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (gpuDynInst->exec_mask[lane]) { + vdst[lane] = (reinterpret_cast( + gpuDynInst->d_data))[lane]; + } + } + + vdst.write(); + } + } + bool vgprIsOffset() { @@ -1157,6 +1506,23 @@ namespace VegaISA } } } + + VecElemU32 + swizzle(VecElemU32 offset, int lane, int elem_size) + { + // This is not described in the spec. We use the swizzle from + // buffer memory instructions and fix the stride to 4. Multiply + // the thread ID by the storage size to avoid threads clobbering + // their data. + return ((offset / 4) * 4 * 64) + + (offset % 4) + (lane * elem_size); + } + + Addr + readFlatScratch(GPUDynInstPtr gpuDynInst) + { + return gpuDynInst->computeUnit()->shader->getScratchBase(); + } }; // Inst_FLAT } // namespace VegaISA } // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/smem.cc b/src/arch/amdgpu/vega/insts/smem.cc new file mode 100644 index 0000000000..a6af4f007d --- /dev/null +++ b/src/arch/amdgpu/vega/insts/smem.cc @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SMEM__S_LOAD_DWORD class methods --- + + Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORD + + Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD() + { + } // ~Inst_SMEM__S_LOAD_DWORD + + /** + * Read 1 dword from scalar data cache. If the offset is specified as an + * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are + * ignored). If the offset is specified as an immediate 20-bit constant, + * the constant is an unsigned byte offset. + */ + void + Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<1>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX2 class methods --- + + Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX2 + + Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2() + { + } // ~Inst_SMEM__S_LOAD_DWORDX2 + + /** + * Read 2 dwords from scalar data cache. See s_load_dword for details on + * the offset input. + */ + void + Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX4 class methods --- + + Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX4 + + Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4() + { + } // ~Inst_SMEM__S_LOAD_DWORDX4 + + // --- description from .arch file --- + // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX8 class methods --- + + Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX8 + + Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8() + { + } // ~Inst_SMEM__S_LOAD_DWORDX8 + + // --- description from .arch file --- + // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<8>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_LOAD_DWORDX16 class methods --- + + Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_load_dwordx16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_LOAD_DWORDX16 + + Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16() + { + } // ~Inst_SMEM__S_LOAD_DWORDX16 + + // --- description from .arch file --- + // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + + addr.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<16>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORD class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dword") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORD + + Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD + + // --- description from .arch file --- + // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the + // --- offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<1>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 1 request, size 32 + ScalarOperandU32 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX2 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx2") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2 + + // --- description from .arch file --- + // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + // use U64 because 2 requests, each size 32 + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX4 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx4") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4 + + // --- description from .arch file --- + // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<4>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 4 requests, each size 32 + ScalarOperandU128 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX8 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx8") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8 + + // --- description from .arch file --- + // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<8>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 8 requests, each size 32 + ScalarOperandU256 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_BUFFER_LOAD_DWORDX16 class methods --- + + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_load_dwordx16") + { + setFlag(MemoryRef); + setFlag(Load); + } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16 + + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16() + { + } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16 + + // --- description from .arch file --- + // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE); + + rsrcDesc.read(); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, rsrcDesc, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe + .issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemRead<16>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst) + { + // 16 requests, each size 32 + ScalarOperandU512 sdst(gpuDynInst, instData.SDATA); + sdst.write(); + } // completeAcc + // --- Inst_SMEM__S_STORE_DWORD class methods --- + + Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_STORE_DWORD + + Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD() + { + } // ~Inst_SMEM__S_STORE_DWORD + + // --- description from .arch file --- + // Write 1 dword to scalar data cache. + // If the offset is specified as an SGPR, the SGPR contains an unsigned + // BYTE offset (the 2 LSBs are ignored). + // If the offset is specified as an immediate 20-bit constant, the + // constant is an unsigned BYTE offset. + void + Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA); + + addr.read(); + sdata.read(); + + std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), + sizeof(ScalarRegU32)); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<1>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_STORE_DWORDX2 class methods --- + + Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_STORE_DWORDX2 + + Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2() + { + } // ~Inst_SMEM__S_STORE_DWORDX2 + + // --- description from .arch file --- + // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); + + addr.read(); + sdata.read(); + + std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), + sizeof(ScalarRegU64)); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<2>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_STORE_DWORDX4 class methods --- + + Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_STORE_DWORDX4 + + Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4() + { + } // ~Inst_SMEM__S_STORE_DWORDX4 + + // --- description from .arch file --- + // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + ScalarRegU32 offset(0); + ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1); + ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA); + + addr.read(); + sdata.read(); + + std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(), + sizeof(gpuDynInst->scalar_data)); + + if (instData.IMM) { + offset = extData.OFFSET; + } else { + ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET); + off_sgpr.read(); + offset = off_sgpr.rawData(); + } + + calcAddr(gpuDynInst, addr, offset); + + gpuDynInst->computeUnit()->scalarMemoryPipe. + issueRequest(gpuDynInst); + } // execute + + void + Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + initMemWrite<4>(gpuDynInst); + } // initiateAcc + + void + Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_BUFFER_STORE_DWORD class methods --- + + Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_store_dword") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_BUFFER_STORE_DWORD + + Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD() + { + } // ~Inst_SMEM__S_BUFFER_STORE_DWORD + + // --- description from .arch file --- + // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the + // --- offset input. + void + Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_BUFFER_STORE_DWORDX2 class methods --- + + Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_store_dwordx2") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_BUFFER_STORE_DWORDX2 + + Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2() + { + } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2 + + // --- description from .arch file --- + // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_BUFFER_STORE_DWORDX4 class methods --- + + Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_buffer_store_dwordx4") + { + setFlag(MemoryRef); + setFlag(Store); + } // Inst_SMEM__S_BUFFER_STORE_DWORDX4 + + Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4() + { + } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4 + + // --- description from .arch file --- + // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on + // the offset input. + void + Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst) + { + } // initiateAcc + + void + Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst) + { + } // completeAcc + // --- Inst_SMEM__S_DCACHE_INV class methods --- + + Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_inv") + { + } // Inst_SMEM__S_DCACHE_INV + + Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV() + { + } // ~Inst_SMEM__S_DCACHE_INV + + // --- description from .arch file --- + // Invalidate the scalar data cache. + void + Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_DCACHE_WB class methods --- + + Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_wb") + { + } // Inst_SMEM__S_DCACHE_WB + + Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB() + { + } // ~Inst_SMEM__S_DCACHE_WB + + // --- description from .arch file --- + // Write back dirty data in the scalar data cache. + void + Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_DCACHE_INV_VOL class methods --- + + Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_inv_vol") + { + } // Inst_SMEM__S_DCACHE_INV_VOL + + Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL() + { + } // ~Inst_SMEM__S_DCACHE_INV_VOL + + // --- description from .arch file --- + // Invalidate the scalar data cache volatile lines. + void + Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_DCACHE_WB_VOL class methods --- + + Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_dcache_wb_vol") + { + } // Inst_SMEM__S_DCACHE_WB_VOL + + Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL() + { + } // ~Inst_SMEM__S_DCACHE_WB_VOL + + // --- description from .arch file --- + // Write back dirty data in the scalar data cache volatile lines. + void + Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_MEMTIME class methods --- + + Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_memtime") + { + // s_memtime does not issue a memory request + setFlag(ALU); + } // Inst_SMEM__S_MEMTIME + + Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME() + { + } // ~Inst_SMEM__S_MEMTIME + + // --- description from .arch file --- + // Return current 64-bit timestamp. + void + Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst) + { + ScalarOperandU64 sdst(gpuDynInst, instData.SDATA); + sdst = (ScalarRegU64)gpuDynInst->computeUnit()->curCycle(); + sdst.write(); + } // execute + // --- Inst_SMEM__S_MEMREALTIME class methods --- + + Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_memrealtime") + { + } // Inst_SMEM__S_MEMREALTIME + + Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME() + { + } // ~Inst_SMEM__S_MEMREALTIME + + // --- description from .arch file --- + // Return current 64-bit RTC. + void + Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_ATC_PROBE class methods --- + + Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_atc_probe") + { + } // Inst_SMEM__S_ATC_PROBE + + Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE() + { + } // ~Inst_SMEM__S_ATC_PROBE + + // --- description from .arch file --- + // Probe or prefetch an address into the SQC data cache. + void + Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SMEM__S_ATC_PROBE_BUFFER class methods --- + + Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER( + InFmt_SMEM *iFmt) + : Inst_SMEM(iFmt, "s_atc_probe_buffer") + { + } // Inst_SMEM__S_ATC_PROBE_BUFFER + + Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER() + { + } // ~Inst_SMEM__S_ATC_PROBE_BUFFER + + // --- description from .arch file --- + // Probe or prefetch an address into the SQC data cache. + void + Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sop1.cc b/src/arch/amdgpu/vega/insts/sop1.cc new file mode 100644 index 0000000000..fa9a103e39 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sop1.cc @@ -0,0 +1,1504 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOP1__S_MOV_B32 class methods --- + + Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_mov_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOV_B32 + + Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32() + { + } // ~Inst_SOP1__S_MOV_B32 + + // --- description from .arch file --- + // D.u = S0.u. + void + Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOV_B64 class methods --- + + Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_mov_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_MOV_B64 + + Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64() + { + } // ~Inst_SOP1__S_MOV_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64. + void + Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_CMOV_B32 class methods --- + + Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_cmov_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_CMOV_B32 + + Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32() + { + } // ~Inst_SOP1__S_CMOV_B32 + + // --- description from .arch file --- + // (SCC) then D.u = S0.u; + // else NOP. + // Conditional move. + void + Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + scc.read(); + + if (scc.rawData()) { + sdst = src.rawData(); + sdst.write(); + } + } // execute + // --- Inst_SOP1__S_CMOV_B64 class methods --- + + Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_cmov_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_CMOV_B64 + + Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64() + { + } // ~Inst_SOP1__S_CMOV_B64 + + // --- description from .arch file --- + // if (SCC) then D.u64 = S0.u64; + // else NOP. + // Conditional move. + void + Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + scc.read(); + + if (scc.rawData()) { + sdst = src.rawData(); + sdst.write(); + } + } // execute + // --- Inst_SOP1__S_NOT_B32 class methods --- + + Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_not_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_NOT_B32 + + Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32() + { + } // ~Inst_SOP1__S_NOT_B32 + + // --- description from .arch file --- + // D.u = ~S0.u; + // SCC = 1 if result is non-zero. + // Bitwise negation. + void + Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = ~src.rawData(); + + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_NOT_B64 class methods --- + + Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_not_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_NOT_B64 + + Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64() + { + } // ~Inst_SOP1__S_NOT_B64 + + // --- description from .arch file --- + // D.u64 = ~S0.u64; + // SCC = 1 if result is non-zero. + // Bitwise negation. + void + Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = ~src.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_WQM_B32 class methods --- + + Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_wqm_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_WQM_B32 + + Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32() + { + } // ~Inst_SOP1__S_WQM_B32 + + // --- description from .arch file --- + // D[i] = (S0[(i & ~3):(i | 3)] != 0); + // Computes whole quad mode for an active/valid mask. + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wholeQuadMode(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_WQM_B64 class methods --- + + Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_wqm_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_WQM_B64 + + Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64() + { + } // ~Inst_SOP1__S_WQM_B64 + + // --- description from .arch file --- + // D[i] = (S0[(i & ~3):(i | 3)] != 0); + // Computes whole quad mode for an active/valid mask. + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wholeQuadMode(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BREV_B32 class methods --- + + Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_brev_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BREV_B32 + + Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32() + { + } // ~Inst_SOP1__S_BREV_B32 + + // --- description from .arch file --- + // D.u[31:0] = S0.u[0:31] (reverse bits). + void + Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = reverseBits(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BREV_B64 class methods --- + + Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_brev_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BREV_B64 + + Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64() + { + } // ~Inst_SOP1__S_BREV_B64 + + // --- description from .arch file --- + // D.u64[63:0] = S0.u64[0:63] (reverse bits). + void + Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = reverseBits(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BCNT0_I32_B32 class methods --- + + Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt0_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT0_I32_B32 + + Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32() + { + } // ~Inst_SOP1__S_BCNT0_I32_B32 + + // --- description from .arch file --- + // D.i = CountZeroBits(S0.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = countZeroBits(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BCNT0_I32_B64 class methods --- + + Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt0_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT0_I32_B64 + + Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64() + { + } // ~Inst_SOP1__S_BCNT0_I32_B64 + + // --- description from .arch file --- + // D.i = CountZeroBits(S0.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = countZeroBits(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BCNT1_I32_B32 class methods --- + + Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt1_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT1_I32_B32 + + Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32() + { + } // ~Inst_SOP1__S_BCNT1_I32_B32 + + // --- description from .arch file --- + // D.i = CountOneBits(S0.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = popCount(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_BCNT1_I32_B64 class methods --- + + Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bcnt1_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BCNT1_I32_B64 + + Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64() + { + } // ~Inst_SOP1__S_BCNT1_I32_B64 + + // --- description from .arch file --- + // D.i = CountOneBits(S0.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = popCount(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_FF0_I32_B32 class methods --- + + Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff0_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_FF0_I32_B32 + + Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32() + { + } // ~Inst_SOP1__S_FF0_I32_B32 + + // --- description from .arch file --- + // D.i = FindFirstZero(S0.u); + // If no zeros are found, return -1. + // Returns the bit position of the first zero from the LSB. + void + Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstZero(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FF0_I32_B64 class methods --- + + Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff0_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_FF0_I32_B64 + + Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64() + { + } // ~Inst_SOP1__S_FF0_I32_B64 + + // --- description from .arch file --- + // D.i = FindFirstZero(S0.u64); + // If no zeros are found, return -1. + // Returns the bit position of the first zero from the LSB. + void + Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstZero(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FF1_I32_B32 class methods --- + + Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff1_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_FF1_I32_B32 + + Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32() + { + } // ~Inst_SOP1__S_FF1_I32_B32 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u); + // If no ones are found, return -1. + // Returns the bit position of the first one from the LSB. + void + Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstOne(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FF1_I32_B64 class methods --- + + Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_ff1_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_FF1_I32_B64 + + Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64() + { + } // ~Inst_SOP1__S_FF1_I32_B64 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u64); + // If no ones are found, return -1. + // Returns the bit position of the first one from the LSB. + void + Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = findFirstOne(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32_B32 class methods --- + + Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32_B32 + + Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32() + { + } // ~Inst_SOP1__S_FLBIT_I32_B32 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u); + // If no ones are found, return -1. + // Counts how many zeros before the first one starting from the MSB. + void + Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = countZeroBitsMsb(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32_B64 class methods --- + + Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32_B64 + + Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64() + { + } // ~Inst_SOP1__S_FLBIT_I32_B64 + + // --- description from .arch file --- + // D.i = FindFirstOne(S0.u64); + // If no ones are found, return -1. + // Counts how many zeros before the first one starting from the MSB. + void + Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = countZeroBitsMsb(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32 class methods --- + + Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32 + + Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32() + { + } // ~Inst_SOP1__S_FLBIT_I32 + + // --- description from .arch file --- + // D.i = FirstOppositeSignBit(S0.i); + // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. + // Counts how many bits in a row (from MSB to LSB) are the same as the + // sign bit. + void + Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = firstOppositeSignBit(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_FLBIT_I32_I64 class methods --- + + Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_flbit_i32_i64") + { + setFlag(ALU); + } // Inst_SOP1__S_FLBIT_I32_I64 + + Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64() + { + } // ~Inst_SOP1__S_FLBIT_I32_I64 + + // --- description from .arch file --- + // D.i = FirstOppositeSignBit(S0.i64); + // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1. + // Counts how many bits in a row (from MSB to LSB) are the same as the + // sign bit. + void + Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = firstOppositeSignBit(src.rawData()); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_SEXT_I32_I8 class methods --- + + Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_sext_i32_i8") + { + setFlag(ALU); + } // Inst_SOP1__S_SEXT_I32_I8 + + Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8() + { + } // ~Inst_SOP1__S_SEXT_I32_I8 + + // --- description from .arch file --- + // D.i = signext(S0.i[7:0]) (sign extension). + void + Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = sext::digits>( + bits(src.rawData(), 7, 0)); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_SEXT_I32_I16 class methods --- + + Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_sext_i32_i16") + { + setFlag(ALU); + } // Inst_SOP1__S_SEXT_I32_I16 + + Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16() + { + } // ~Inst_SOP1__S_SEXT_I32_I16 + + // --- description from .arch file --- + // D.i = signext(S0.i[15:0]) (sign extension). + void + Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = sext::digits>( + bits(src.rawData(), 15, 0)); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET0_B32 class methods --- + + Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset0_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET0_B32 + + Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32() + { + } // ~Inst_SOP1__S_BITSET0_B32 + + // --- description from .arch file --- + // D.u[S0.u[4:0]] = 0. + void + Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 4, 0), 0); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET0_B64 class methods --- + + Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset0_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET0_B64 + + Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64() + { + } // ~Inst_SOP1__S_BITSET0_B64 + + // --- description from .arch file --- + // D.u64[S0.u[5:0]] = 0. + void + Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 5, 0), 0); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET1_B32 class methods --- + + Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset1_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET1_B32 + + Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32() + { + } // ~Inst_SOP1__S_BITSET1_B32 + + // --- description from .arch file --- + // D.u[S0.u[4:0]] = 1. + void + Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 4, 0), 1); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_BITSET1_B64 class methods --- + + Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_bitset1_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_BITSET1_B64 + + Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64() + { + } // ~Inst_SOP1__S_BITSET1_B64 + + // --- description from .arch file --- + // D.u64[S0.u[5:0]] = 1. + void + Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst.setBit(bits(src.rawData(), 5, 0), 1); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_GETPC_B64 class methods --- + + Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_getpc_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_GETPC_B64 + + Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64() + { + } // ~Inst_SOP1__S_GETPC_B64 + + // --- description from .arch file --- + // D.u64 = PC + 4. + // Destination receives the byte address of the next instruction. + void + Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Addr pc = gpuDynInst->pc(); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + sdst = pc + 4; + + sdst.write(); + } // execute + // --- Inst_SOP1__S_SETPC_B64 class methods --- + + Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_setpc_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_SETPC_B64 + + Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64() + { + } // ~Inst_SOP1__S_SETPC_B64 + + // --- description from .arch file --- + // PC = S0.u64. + // S0.u64 is a byte address of the instruction to jump to. + void + Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + + src.read(); + + wf->pc(src.rawData()); + } // execute + // --- Inst_SOP1__S_SWAPPC_B64 class methods --- + + Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_swappc_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_SWAPPC_B64 + + Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64() + { + } // ~Inst_SOP1__S_SWAPPC_B64 + + // --- description from .arch file --- + // D.u64 = PC + 4; PC = S0.u64. + // S0.u64 is a byte address of the instruction to jump to. + void + Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = pc + 4; + + wf->pc(src.rawData()); + sdst.write(); + } // execute + // --- Inst_SOP1__S_RFE_B64 class methods --- + + Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_rfe_b64") + { + } // Inst_SOP1__S_RFE_B64 + + Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64() + { + } // ~Inst_SOP1__S_RFE_B64 + + // --- description from .arch file --- + // PRIV = 0; + // PC = S0.u64. + // Return from exception handler and continue. + // This instruction may only be used within a trap handler. + void + Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP1__S_AND_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_and_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_AND_SAVEEXEC_B64 + + Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_AND_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 & EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() & wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_OR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_or_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_OR_SAVEEXEC_B64 + + Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_OR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 | EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() | wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_XOR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_xor_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_XOR_SAVEEXEC_B64 + + Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 ^ EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() ^ wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_ANDN2_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_andn2_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64 + + Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 & ~EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() &~ wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_ORN2_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_orn2_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_ORN2_SAVEEXEC_B64 + + Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = S0.u64 | ~EXEC; + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = src.rawData() |~ wf->execMask().to_ullong(); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_NAND_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_nand_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_NAND_SAVEEXEC_B64 + + Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = ~(S0.u64 & EXEC); + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong()); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_NOR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_nor_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_NOR_SAVEEXEC_B64 + + Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = ~(S0.u64 | EXEC); + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong()); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_XNOR_SAVEEXEC_B64 class methods --- + + Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_xnor_saveexec_b64") + { + setFlag(ALU); + setFlag(ReadsEXEC); + setFlag(WritesEXEC); + } // Inst_SOP1__S_XNOR_SAVEEXEC_B64 + + Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64() + { + } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64 + + // --- description from .arch file --- + // D.u64 = EXEC; + // EXEC = ~(S0.u64 ^ EXEC); + // SCC = 1 if the new value of EXEC is non-zero. + void + Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = wf->execMask().to_ullong(); + wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong()); + scc = wf->execMask().any() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_QUADMASK_B32 class methods --- + + Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_quadmask_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_QUADMASK_B32 + + Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32() + { + } // ~Inst_SOP1__S_QUADMASK_B32 + + // --- description from .arch file --- + // D.u = QuadMask(S0.u): + // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0; + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = quadMask(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_QUADMASK_B64 class methods --- + + Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_quadmask_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_QUADMASK_B64 + + Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64() + { + } // ~Inst_SOP1__S_QUADMASK_B64 + + // --- description from .arch file --- + // D.u64 = QuadMask(S0.u64): + // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0; + // SCC = 1 if result is non-zero. + void + Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = quadMask(src.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_MOVRELS_B32 class methods --- + + Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movrels_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELS_B32 + + Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32() + { + } // ~Inst_SOP1__S_MOVRELS_B32 + + // --- description from .arch file --- + // D.u = SGPR[S0.u + M0.u].u (move from relative source). + void + Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData()); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOVRELS_B64 class methods --- + + Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movrels_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELS_B64 + + Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64() + { + } // ~Inst_SOP1__S_MOVRELS_B64 + + // --- description from .arch file --- + // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source). + // The index in M0.u must be even for this operation. + void + Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData()); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOVRELD_B32 class methods --- + + Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movreld_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELD_B32 + + Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32() + { + } // ~Inst_SOP1__S_MOVRELD_B32 + + // --- description from .arch file --- + // SGPR[D.u + M0.u].u = S0.u (move to relative destination). + void + Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData()); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_MOVRELD_B64 class methods --- + + Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_movreld_b64") + { + setFlag(ALU); + } // Inst_SOP1__S_MOVRELD_B64 + + Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64() + { + } // ~Inst_SOP1__S_MOVRELD_B64 + + // --- description from .arch file --- + // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination). + // The index in M0.u must be even for this operation. + void + Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 m0(gpuDynInst, REG_M0); + m0.read(); + ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData()); + + src.read(); + + sdst = src.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP1__S_CBRANCH_JOIN class methods --- + + Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_cbranch_join") + { + setFlag(Branch); + setFlag(WritesEXEC); + } // Inst_SOP1__S_CBRANCH_JOIN + + Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN() + { + } // ~Inst_SOP1__S_CBRANCH_JOIN + + // --- description from .arch file --- + // saved_csp = S0.u; + // if (CSP == saved_csp) then + // PC += 4; // Second time to JOIN: continue with program. + // else + // CSP -= 1; // First time to JOIN; jump to other FORK path. + // {PC, EXEC} = SGPR[CSP * 4]; // Read 128 bits from 4 consecutive + // SGPRs. + // end + // Conditional branch join point (end of conditional branch block). S0 is + // saved CSP value. + // See S_CBRANCH_G_FORK and S_CBRANCH_I_FORK for related instructions. + void + Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP1__S_ABS_I32 class methods --- + + Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_abs_i32") + { + setFlag(ALU); + } // Inst_SOP1__S_ABS_I32 + + Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32() + { + } // ~Inst_SOP1__S_ABS_I32 + + // --- description from .arch file --- + // if (S.i < 0) then D.i = -S.i; + // else D.i = S.i; + // SCC = 1 if result is non-zero. + // Integer absolute value. + void + Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = std::abs(src.rawData()); + + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP1__S_MOV_FED_B32 class methods --- + + Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_mov_fed_b32") + { + setFlag(ALU); + } // Inst_SOP1__S_MOV_FED_B32 + + Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32() + { + } // ~Inst_SOP1__S_MOV_FED_B32 + + // --- description from .arch file --- + // D.u = S0.u. Introduce an EDC double-detect error on write to the + // destination SGPR. + void + Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP1__S_SET_GPR_IDX_IDX class methods --- + + Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX( + InFmt_SOP1 *iFmt) + : Inst_SOP1(iFmt, "s_set_gpr_idx_idx") + { + } // Inst_SOP1__S_SET_GPR_IDX_IDX + + Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX() + { + } // ~Inst_SOP1__S_SET_GPR_IDX_IDX + + // --- description from .arch file --- + // M0[7:0] = S0.u[7:0]. + // Modify the index used in vector GPR indexing. + void + Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sop2.cc b/src/arch/amdgpu/vega/insts/sop2.cc new file mode 100644 index 0000000000..a2965763f7 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sop2.cc @@ -0,0 +1,1556 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOP2__S_ADD_U32 class methods --- + + Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_add_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_ADD_U32 + + Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32() + { + } // ~Inst_SOP2__S_ADD_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + // SCC = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an unsigned + // --- overflow/carry-out for S_ADDC_U32. + void + Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() + src1.rawData(); + scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()) + >= 0x100000000ULL ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_SUB_U32 class methods --- + + Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_sub_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_SUB_U32 + + Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32() + { + } // ~Inst_SOP2__S_SUB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out for + // --- S_SUBB_U32. + void + Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() - src1.rawData(); + scc = (src1.rawData() > src0.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ADD_I32 class methods --- + + Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_add_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_ADD_I32 + + Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32() + { + } // ~Inst_SOP2__S_ADD_I32 + + // --- description from .arch file --- + // D.i = S0.i + S1.i; + // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed + // overflow. + // This opcode is not suitable for use with S_ADDC_U32 for implementing + // 64-bit operations. + void + Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() + src1.rawData(); + scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31) + && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) + ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_SUB_I32 class methods --- + + Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_sub_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_SUB_I32 + + Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32() + { + } // ~Inst_SOP2__S_SUB_I32 + + // --- description from .arch file --- + // D.i = S0.i - S1.i; + // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed + // overflow. + // CAUTION: The condition code behaviour for this opcode is inconsistent + // with V_SUB_I32; see V_SUB_I32 for further details. + // This opcode is not suitable for use with S_SUBB_U32 for implementing + // 64-bit operations. + void + Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() - src1.rawData(); + scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31) + && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ADDC_U32 class methods --- + + Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_addc_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_ADDC_U32 + + Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32() + { + } // ~Inst_SOP2__S_ADDC_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + SCC; + // SCC = (S0.u + S1.u + SCC >= 0x800000000ULL ? 1 : 0) is an unsigned + // overflow. + void + Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = src0.rawData() + src1.rawData() + scc.rawData(); + scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData() + + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_SUBB_U32 class methods --- + + Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_subb_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_SUBB_U32 + + Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32() + { + } // ~Inst_SOP2__S_SUBB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u - SCC; + // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow. + void + Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = src0.rawData() - src1.rawData() - scc.rawData(); + scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MIN_I32 class methods --- + + Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_min_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MIN_I32 + + Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32() + { + } // ~Inst_SOP2__S_MIN_I32 + + // --- description from .arch file --- + // D.i = (S0.i < S1.i) ? S0.i : S1.i; + // SCC = 1 if S0 is chosen as the minimum value. + void + Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::min(src0.rawData(), src1.rawData()); + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MIN_U32 class methods --- + + Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_min_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_MIN_U32 + + Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32() + { + } // ~Inst_SOP2__S_MIN_U32 + + // --- description from .arch file --- + // D.u = (S0.u < S1.u) ? S0.u : S1.u; + // SCC = 1 if S0 is chosen as the minimum value. + void + Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::min(src0.rawData(), src1.rawData()); + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MAX_I32 class methods --- + + Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_max_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MAX_I32 + + Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32() + { + } // ~Inst_SOP2__S_MAX_I32 + + // --- description from .arch file --- + // D.i = (S0.i > S1.i) ? S0.i : S1.i; + // SCC = 1 if S0 is chosen as the maximum value. + void + Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::max(src0.rawData(), src1.rawData()); + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_MAX_U32 class methods --- + + Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_max_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_MAX_U32 + + Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32() + { + } // ~Inst_SOP2__S_MAX_U32 + + // --- description from .arch file --- + // D.u = (S0.u > S1.u) ? S0.u : S1.u; + // SCC = 1 if S0 is chosen as the maximum value. + void + Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = std::max(src0.rawData(), src1.rawData()); + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_CSELECT_B32 class methods --- + + Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_cselect_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_CSELECT_B32 + + Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32() + { + } // ~Inst_SOP2__S_CSELECT_B32 + + // --- description from .arch file --- + // D.u = SCC ? S0.u : S1.u (conditional select). + void + Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = scc.rawData() ? src0.rawData() : src1.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_CSELECT_B64 class methods --- + + Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_cselect_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_CSELECT_B64 + + Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64() + { + } // ~Inst_SOP2__S_CSELECT_B64 + + // --- description from .arch file --- + // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select). + void + Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + scc.read(); + + sdst = scc.rawData() ? src0.rawData() : src1.rawData(); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_AND_B32 class methods --- + + Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_and_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_AND_B32 + + Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32() + { + } // ~Inst_SOP2__S_AND_B32 + + // --- description from .arch file --- + // D.u = S0.u & S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() & src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_AND_B64 class methods --- + + Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_and_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_AND_B64 + + Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64() + { + } // ~Inst_SOP2__S_AND_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 & S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() & src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_OR_B32 class methods --- + + Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_or_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_OR_B32 + + Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32() + { + } // ~Inst_SOP2__S_OR_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() | src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_OR_B64 class methods --- + + Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_or_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_OR_B64 + + Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64() + { + } // ~Inst_SOP2__S_OR_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 | S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() | src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XOR_B32 class methods --- + + Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xor_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_XOR_B32 + + Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32() + { + } // ~Inst_SOP2__S_XOR_B32 + + // --- description from .arch file --- + // D.u = S0.u ^ S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() ^ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XOR_B64 class methods --- + + Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xor_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_XOR_B64 + + Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64() + { + } // ~Inst_SOP2__S_XOR_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 ^ S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() ^ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ANDN2_B32 class methods --- + + Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_andn2_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_ANDN2_B32 + + Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32() + { + } // ~Inst_SOP2__S_ANDN2_B32 + + // --- description from .arch file --- + // D.u = S0.u & ~S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() &~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ANDN2_B64 class methods --- + + Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_andn2_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_ANDN2_B64 + + Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64() + { + } // ~Inst_SOP2__S_ANDN2_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 & ~S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() &~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ORN2_B32 class methods --- + + Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_orn2_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_ORN2_B32 + + Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32() + { + } // ~Inst_SOP2__S_ORN2_B32 + + // --- description from .arch file --- + // D.u = S0.u | ~S1.u; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() |~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ORN2_B64 class methods --- + + Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_orn2_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_ORN2_B64 + + Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64() + { + } // ~Inst_SOP2__S_ORN2_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 | ~S1.u64; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = src0.rawData() |~ src1.rawData(); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NAND_B32 class methods --- + + Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nand_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_NAND_B32 + + Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32() + { + } // ~Inst_SOP2__S_NAND_B32 + + // --- description from .arch file --- + // D.u = ~(S0.u & S1.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() & src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NAND_B64 class methods --- + + Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nand_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_NAND_B64 + + Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64() + { + } // ~Inst_SOP2__S_NAND_B64 + + // --- description from .arch file --- + // D.u64 = ~(S0.u64 & S1.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() & src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NOR_B32 class methods --- + + Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nor_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_NOR_B32 + + Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32() + { + } // ~Inst_SOP2__S_NOR_B32 + + // --- description from .arch file --- + // D.u = ~(S0.u | S1.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() | src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_NOR_B64 class methods --- + + Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_nor_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_NOR_B64 + + Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64() + { + } // ~Inst_SOP2__S_NOR_B64 + + // --- description from .arch file --- + // D.u64 = ~(S0.u64 | S1.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() | src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XNOR_B32 class methods --- + + Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xnor_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_XNOR_B32 + + Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32() + { + } // ~Inst_SOP2__S_XNOR_B32 + + // --- description from .arch file --- + // D.u = ~(S0.u ^ S1.u); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() ^ src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_XNOR_B64 class methods --- + + Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_xnor_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_XNOR_B64 + + Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64() + { + } // ~Inst_SOP2__S_XNOR_B64 + + // --- description from .arch file --- + // D.u64 = ~(S0.u64 ^ S1.u64); + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = ~(src0.rawData() ^ src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHL_B32 class methods --- + + Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshl_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHL_B32 + + Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32() + { + } // ~Inst_SOP2__S_LSHL_B32 + + // --- description from .arch file --- + // D.u = S0.u << S1.u[4:0]; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() << bits(src1.rawData(), 4, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHL_B64 class methods --- + + Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshl_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHL_B64 + + Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64() + { + } // ~Inst_SOP2__S_LSHL_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 << S1.u[5:0]; + // SCC = 1 if result is non-zero. + void + Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() << bits(src1.rawData(), 5, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHR_B32 class methods --- + + Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshr_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHR_B32 + + Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32() + { + } // ~Inst_SOP2__S_LSHR_B32 + + // --- description from .arch file --- + // D.u = S0.u >> S1.u[4:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to zero. + void + Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_LSHR_B64 class methods --- + + Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_lshr_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_LSHR_B64 + + Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64() + { + } // ~Inst_SOP2__S_LSHR_B64 + + // --- description from .arch file --- + // D.u64 = S0.u64 >> S1.u[5:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to zero. + void + Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ASHR_I32 class methods --- + + Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_ashr_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_ASHR_I32 + + Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32() + { + } // ~Inst_SOP2__S_ASHR_I32 + + // --- description from .arch file --- + // D.i = signext(S0.i) >> S1.u[4:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to the sign bit of the input value. + void + Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_ASHR_I64 class methods --- + + Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_ashr_i64") + { + setFlag(ALU); + } // Inst_SOP2__S_ASHR_I64 + + Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64() + { + } // ~Inst_SOP2__S_ASHR_I64 + + // --- description from .arch file --- + // D.i64 = signext(S0.i64) >> S1.u[5:0]; + // SCC = 1 if result is non-zero. + // The vacated bits are set to the sign bit of the input value. + void + Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0)); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFM_B32 class methods --- + + Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfm_b32") + { + setFlag(ALU); + } // Inst_SOP2__S_BFM_B32 + + Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32() + { + } // ~Inst_SOP2__S_BFM_B32 + + // --- description from .arch file --- + // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask). + void + Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1) + << bits(src1.rawData(), 4, 0); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_BFM_B64 class methods --- + + Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfm_b64") + { + setFlag(ALU); + } // Inst_SOP2__S_BFM_B64 + + Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64() + { + } // ~Inst_SOP2__S_BFM_B64 + + // --- description from .arch file --- + // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask). + void + Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1) + << bits(src1.rawData(), 5, 0); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_MUL_I32 class methods --- + + Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_mul_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MUL_I32 + + Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32() + { + } // ~Inst_SOP2__S_MUL_I32 + + // --- description from .arch file --- + // D.i = S0.i * S1.i. + void + Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + ScalarRegI64 tmp = src0.rawData() * src1.rawData(); + sdst = tmp & mask(32); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_BFE_U32 class methods --- + + Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_U32 + + Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32() + { + } // ~Inst_SOP2__S_BFE_U32 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is + // field width. + // D.u = (S0.u>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFE_I32 class methods --- + + Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_I32 + + Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32() + { + } // ~Inst_SOP2__S_BFE_I32 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is + // field width. + // D.i = (S0.i>>S1.u[4:0]) & ((1<> bits(src1.rawData(), 4, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + + // Above extracted a signed int of size src1[22:16] bits which needs + // to be signed-extended. Check if the MSB of our src1[22:16]-bit + // integer is 1, and sign extend it is. + // + // Note: The description in the Vega ISA manual does not mention to + // sign-extend the result. An update description can be found in the + // more recent RDNA3 manual here: + // https://developer.amd.com/wp-content/resources/ + // RDNA3_Shader_ISA_December2022.pdf + if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { + sdst = sdst.rawData() + | (0xffffffff << bits(src1.rawData(), 22, 16)); + } + + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFE_U64 class methods --- + + Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_u64") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_U64 + + Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64() + { + } // ~Inst_SOP2__S_BFE_U64 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is + // field width. + // D.u64 = (S0.u64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_BFE_I64 class methods --- + + Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_bfe_i64") + { + setFlag(ALU); + } // Inst_SOP2__S_BFE_I64 + + Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64() + { + } // ~Inst_SOP2__S_BFE_I64 + + // --- description from .arch file --- + // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is + // field width. + // D.i64 = (S0.i64>>S1.u[5:0]) & ((1<> bits(src1.rawData(), 5, 0)) + & ((1 << bits(src1.rawData(), 22, 16)) - 1); + + // Above extracted a signed int of size src1[22:16] bits which needs + // to be signed-extended. Check if the MSB of our src1[22:16]-bit + // integer is 1, and sign extend it is. + if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) { + sdst = sdst.rawData() + | 0xffffffffffffffff << bits(src1.rawData(), 22, 16); + } + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_CBRANCH_G_FORK class methods --- + + Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_cbranch_g_fork") + { + setFlag(Branch); + } // Inst_SOP2__S_CBRANCH_G_FORK + + Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK() + { + } // ~Inst_SOP2__S_CBRANCH_G_FORK + + // --- description from .arch file --- + // mask_pass = S0.u64 & EXEC; + // mask_fail = ~S0.u64 & EXEC; + // if (mask_pass == EXEC) + // PC = S1.u64; + // elsif (mask_fail == EXEC) + // PC += 4; + // elsif (bitcount(mask_fail) < bitcount(mask_pass)) + // EXEC = mask_fail; + // SGPR[CSP*4] = { S1.u64, mask_pass }; + // CSP++; + // PC += 4; + // else + // EXEC = mask_pass; + // SGPR[CSP*4] = { PC + 4, mask_fail }; + // CSP++; + // PC = S1.u64; + // end. + // Conditional branch using branch-stack. + // S0 = compare mask(vcc or any sgpr) and + // S1 = 64-bit byte address of target instruction. + // See also S_CBRANCH_JOIN. + void + Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP2__S_ABSDIFF_I32 class methods --- + + Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_absdiff_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_ABSDIFF_I32 + + Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32() + { + } // ~Inst_SOP2__S_ABSDIFF_I32 + + // --- description from .arch file --- + // D.i = S0.i - S1.i; + // if (D.i < 0) then D.i = -D.i; + // SCC = 1 if result is non-zero. + // Compute the absolute value of difference between two values. + void + Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + sdst = std::abs(src0.rawData() - src1.rawData()); + scc = sdst.rawData() ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOP2__S_RFE_RESTORE_B64 class methods --- + + Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64( + InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_rfe_restore_b64") + { + } // Inst_SOP2__S_RFE_RESTORE_B64 + + Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64() + { + } // ~Inst_SOP2__S_RFE_RESTORE_B64 + + // --- description from .arch file --- + // PRIV = 0; + // PC = S0.u64; + // INST_ATC = S1.u32[0]. + // Return from exception handler and continue, possibly changing the + // --- instruction ATC mode. + // This instruction may only be used within a trap handler. + // Use this instruction when the main program may be in a different memory + // --- space than the trap handler. + void + Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOP2__S_MUL_HI_U32 class methods --- + + Inst_SOP2__S_MUL_HI_U32::Inst_SOP2__S_MUL_HI_U32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_mul_hi_u32") + { + setFlag(ALU); + } // Inst_SOP2__S_MUL_HI_U32 + + Inst_SOP2__S_MUL_HI_U32::~Inst_SOP2__S_MUL_HI_U32() + { + } // ~Inst_SOP2__S_MUL_HI_U32 + + // --- description from .arch file --- + // D.u = (S0.u * S1.u) >> 32; + void + Inst_SOP2__S_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + VecElemU64 tmp_dst = + ((VecElemU64)src0.rawData() * (VecElemU64)src1.rawData()); + sdst = (tmp_dst >> 32); + + sdst.write(); + } // execute + // --- Inst_SOP2__S_MUL_HI_I32 class methods --- + + Inst_SOP2__S_MUL_HI_I32::Inst_SOP2__S_MUL_HI_I32(InFmt_SOP2 *iFmt) + : Inst_SOP2(iFmt, "s_mul_hi_i32") + { + setFlag(ALU); + } // Inst_SOP2__S_MUL_HI_I32 + + Inst_SOP2__S_MUL_HI_I32::~Inst_SOP2__S_MUL_HI_I32() + { + } // ~Inst_SOP2__S_MUL_HI_I32 + + // --- description from .arch file --- + // D.u = (S0.u * S1.u) >> 32; + void + Inst_SOP2__S_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src0.read(); + src1.read(); + + VecElemI64 tmp_src0 = + sext::digits>(src0.rawData()); + VecElemI64 tmp_src1 = + sext::digits>(src1.rawData()); + sdst = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); + + sdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sopc.cc b/src/arch/amdgpu/vega/insts/sopc.cc new file mode 100644 index 0000000000..9c58688e53 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sopc.cc @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOPC__S_CMP_EQ_I32 class methods --- + + Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_eq_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_EQ_I32 + + Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32() + { + } // ~Inst_SOPC__S_CMP_EQ_I32 + + // --- description from .arch file --- + // SCC = (S0.i == S1.i). + void + Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() == src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LG_I32 class methods --- + + Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lg_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LG_I32 + + Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32() + { + } // ~Inst_SOPC__S_CMP_LG_I32 + + // --- description from .arch file --- + // SCC = (S0.i != S1.i). + void + Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() != src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GT_I32 class methods --- + + Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_gt_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GT_I32 + + Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32() + { + } // ~Inst_SOPC__S_CMP_GT_I32 + + // --- description from .arch file --- + // SCC = (S0.i > S1.i). + void + Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GE_I32 class methods --- + + Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_ge_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GE_I32 + + Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32() + { + } // ~Inst_SOPC__S_CMP_GE_I32 + + // --- description from .arch file --- + // SCC = (S0.i >= S1.i). + void + Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LT_I32 class methods --- + + Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lt_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LT_I32 + + Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32() + { + } // ~Inst_SOPC__S_CMP_LT_I32 + + // --- description from .arch file --- + // SCC = (S0.i < S1.i). + void + Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LE_I32 class methods --- + + Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_le_i32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LE_I32 + + Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32() + { + } // ~Inst_SOPC__S_CMP_LE_I32 + + // --- description from .arch file --- + // SCC = (S0.i <= S1.i). + void + Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_EQ_U32 class methods --- + + Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_eq_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_EQ_U32 + + Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32() + { + } // ~Inst_SOPC__S_CMP_EQ_U32 + + // --- description from .arch file --- + // SCC = (S0.u == S1.u). + void + Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() == src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LG_U32 class methods --- + + Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lg_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LG_U32 + + Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32() + { + } // ~Inst_SOPC__S_CMP_LG_U32 + + // --- description from .arch file --- + // SCC = (S0.u != S1.u). + void + Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() != src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GT_U32 class methods --- + + Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_gt_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GT_U32 + + Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32() + { + } // ~Inst_SOPC__S_CMP_GT_U32 + + // --- description from .arch file --- + // SCC = (S0.u > S1.u). + void + Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() > src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_GE_U32 class methods --- + + Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_ge_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_GE_U32 + + Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32() + { + } // ~Inst_SOPC__S_CMP_GE_U32 + + // --- description from .arch file --- + // SCC = (S0.u >= S1.u). + void + Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() >= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LT_U32 class methods --- + + Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lt_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LT_U32 + + Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32() + { + } // ~Inst_SOPC__S_CMP_LT_U32 + + // --- description from .arch file --- + // SCC = (S0.u < S1.u). + void + Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() < src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LE_U32 class methods --- + + Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_le_u32") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LE_U32 + + Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32() + { + } // ~Inst_SOPC__S_CMP_LE_U32 + + // --- description from .arch file --- + // SCC = (S0.u <= S1.u). + void + Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() <= src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP0_B32 class methods --- + + Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp0_b32") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP0_B32 + + Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32() + { + } // ~Inst_SOPC__S_BITCMP0_B32 + + // --- description from .arch file --- + // SCC = (S0.u[S1.u[4:0]] == 0). + void + Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP1_B32 class methods --- + + Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp1_b32") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP1_B32 + + Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32() + { + } // ~Inst_SOPC__S_BITCMP1_B32 + + // --- description from .arch file --- + // SCC = (S0.u[S1.u[4:0]] == 1). + void + Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP0_B64 class methods --- + + Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp0_b64") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP0_B64 + + Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64() + { + } // ~Inst_SOPC__S_BITCMP0_B64 + + // --- description from .arch file --- + // SCC = (S0.u64[S1.u[5:0]] == 0). + void + Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_BITCMP1_B64 class methods --- + + Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_bitcmp1_b64") + { + setFlag(ALU); + } // Inst_SOPC__S_BITCMP1_B64 + + Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64() + { + } // ~Inst_SOPC__S_BITCMP1_B64 + + // --- description from .arch file --- + // SCC = (S0.u64[S1.u[5:0]] == 1). + void + Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_SETVSKIP class methods --- + + Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_setvskip") + { + } // Inst_SOPC__S_SETVSKIP + + Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP() + { + } // ~Inst_SOPC__S_SETVSKIP + + // --- description from .arch file --- + // VSKIP = S0.u[S1.u[4:0]]. + // Enables and disables VSKIP mode. + // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are + // issued. + // If any vector operations are outstanding, S_WAITCNT must be issued + // before executing. + // This instruction requires one waitstate after executing (e.g. S_NOP 0). + // Example: + // s_waitcnt 0 + // s_setvskip 1, 0 // Enable vskip mode. + // s_nop 1 + void + Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPC__S_SET_GPR_IDX_ON class methods --- + + Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_set_gpr_idx_on") + { + } // Inst_SOPC__S_SET_GPR_IDX_ON + + Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON() + { + } // ~Inst_SOPC__S_SET_GPR_IDX_ON + + // --- description from .arch file --- + // MODE.gpr_idx_en = 1; + // M0[7:0] = S0.u[7:0]; + // M0[15:12] = SIMM4 (direct contents of S1 field); + // // Remaining bits of M0 are unmodified. + // Enable GPR indexing mode. Vector operations after this will perform + // relative GPR addressing based on the contents of M0. The structure + // SQ_M0_GPR_IDX_WORD may be used to decode M0. + // The raw contents of the S1 field are read and used to set the enable + // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and + // S1[3] = VDST_REL. + void + Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPC__S_CMP_EQ_U64 class methods --- + + Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_eq_u64") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_EQ_U64 + + Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64() + { + } // ~Inst_SOPC__S_CMP_EQ_U64 + + // --- description from .arch file --- + // SCC = (S0.i64 == S1.i64). + void + Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() == src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPC__S_CMP_LG_U64 class methods --- + + Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt) + : Inst_SOPC(iFmt, "s_cmp_lg_u64") + { + setFlag(ALU); + } // Inst_SOPC__S_CMP_LG_U64 + + Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64() + { + } // ~Inst_SOPC__S_CMP_LG_U64 + + // --- description from .arch file --- + // SCC = (S0.i64 != S1.i64). + void + Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0); + ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src0.read(); + src1.read(); + + scc = (src0.rawData() != src1.rawData()) ? 1 : 0; + + scc.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sopk.cc b/src/arch/amdgpu/vega/insts/sopk.cc new file mode 100644 index 0000000000..7abbb9abb4 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sopk.cc @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "dev/amdgpu/hwreg_defines.hh" +#include "gpu-compute/shader.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOPK__S_MOVK_I32 class methods --- + + Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_movk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_MOVK_I32 + + Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32() + { + } // ~Inst_SOPK__S_MOVK_I32 + + // --- description from .arch file --- + // D.i = signext(SIMM16) (sign extension). + void + Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + sdst = simm16; + + sdst.write(); + } // execute + // --- Inst_SOPK__S_CMOVK_I32 class methods --- + + Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmovk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMOVK_I32 + + Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32() + { + } // ~Inst_SOPK__S_CMOVK_I32 + + // --- description from .arch file --- + // if (SCC) then D.i = signext(SIMM16); + // else NOP. + // Conditional move with sign extension. + void + Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + scc.read(); + + if (scc.rawData()) { + sdst = simm16; + sdst.write(); + } + } // execute + // --- Inst_SOPK__S_CMPK_EQ_I32 class methods --- + + Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_eq_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_EQ_I32 + + Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32() + { + } // ~Inst_SOPK__S_CMPK_EQ_I32 + + // --- description from .arch file --- + // SCC = (S0.i == signext(SIMM16)). + void + Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() == simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LG_I32 class methods --- + + Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lg_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LG_I32 + + Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32() + { + } // ~Inst_SOPK__S_CMPK_LG_I32 + + // --- description from .arch file --- + // SCC = (S0.i != signext(SIMM16)). + void + Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() != simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GT_I32 class methods --- + + Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_gt_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GT_I32 + + Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32() + { + } // ~Inst_SOPK__S_CMPK_GT_I32 + + // --- description from .arch file --- + // SCC = (S0.i > signext(SIMM16)). + void + Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() > simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GE_I32 class methods --- + + Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_ge_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GE_I32 + + Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32() + { + } // ~Inst_SOPK__S_CMPK_GE_I32 + + // --- description from .arch file --- + // SCC = (S0.i >= signext(SIMM16)). + void + Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() >= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LT_I32 class methods --- + + Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lt_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LT_I32 + + Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32() + { + } // ~Inst_SOPK__S_CMPK_LT_I32 + + // --- description from .arch file --- + // SCC = (S0.i < signext(SIMM16)). + void + Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() < simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LE_I32 class methods --- + + Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_le_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LE_I32 + + Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32() + { + } // ~Inst_SOPK__S_CMPK_LE_I32 + + // --- description from .arch file --- + // SCC = (S0.i <= signext(SIMM16)). + void + Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16); + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() <= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_EQ_U32 class methods --- + + Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_eq_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_EQ_U32 + + Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32() + { + } // ~Inst_SOPK__S_CMPK_EQ_U32 + + // --- description from .arch file --- + // SCC = (S0.u == SIMM16). + void + Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() == simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LG_U32 class methods --- + + Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lg_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LG_U32 + + Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32() + { + } // ~Inst_SOPK__S_CMPK_LG_U32 + + // --- description from .arch file --- + // SCC = (S0.u != SIMM16). + void + Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() != simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GT_U32 class methods --- + + Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_gt_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GT_U32 + + Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32() + { + } // ~Inst_SOPK__S_CMPK_GT_U32 + + // --- description from .arch file --- + // SCC = (S0.u > SIMM16). + void + Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() > simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_GE_U32 class methods --- + + Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_ge_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_GE_U32 + + Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32() + { + } // ~Inst_SOPK__S_CMPK_GE_U32 + + // --- description from .arch file --- + // SCC = (S0.u >= SIMM16). + void + Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() >= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LT_U32 class methods --- + + Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_lt_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LT_U32 + + Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32() + { + } // ~Inst_SOPK__S_CMPK_LT_U32 + + // --- description from .arch file --- + // SCC = (S0.u < SIMM16). + void + Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() < simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_CMPK_LE_U32 class methods --- + + Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cmpk_le_u32") + { + setFlag(ALU); + } // Inst_SOPK__S_CMPK_LE_U32 + + Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32() + { + } // ~Inst_SOPK__S_CMPK_LE_U32 + + // --- description from .arch file --- + // SCC = (S0.u <= SIMM16). + void + Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16; + ConstScalarOperandU32 src(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + scc = (src.rawData() <= simm16) ? 1 : 0; + + scc.write(); + } // execute + // --- Inst_SOPK__S_ADDK_I32 class methods --- + + Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_addk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_ADDK_I32 + + Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32() + { + } // ~Inst_SOPK__S_ADDK_I32 + + // --- description from .arch file --- + // D.i = D.i + signext(SIMM16); + // SCC = overflow. + void + Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + ScalarOperandU32 scc(gpuDynInst, REG_SCC); + + src.read(); + + sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16); + scc = (bits(src.rawData(), 31) == bits(simm16, 15) + && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0; + + sdst.write(); + scc.write(); + } // execute + // --- Inst_SOPK__S_MULK_I32 class methods --- + + Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_mulk_i32") + { + setFlag(ALU); + } // Inst_SOPK__S_MULK_I32 + + Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32() + { + } // ~Inst_SOPK__S_MULK_I32 + + // --- description from .arch file --- + // D.i = D.i * signext(SIMM16). + void + Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandI32 src(gpuDynInst, instData.SDST); + ScalarOperandI32 sdst(gpuDynInst, instData.SDST); + + src.read(); + + sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16); + + sdst.write(); + } // execute + // --- Inst_SOPK__S_CBRANCH_I_FORK class methods --- + + Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_cbranch_i_fork") + { + setFlag(Branch); + } // Inst_SOPK__S_CBRANCH_I_FORK + + Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK() + { + } // ~Inst_SOPK__S_CBRANCH_I_FORK + + // --- description from .arch file --- + // mask_pass = S0.u64 & EXEC; + // mask_fail = ~S0.u64 & EXEC; + // target_addr = PC + signext(SIMM16 * 4) + 4; + // if (mask_pass == EXEC) + // PC = target_addr; + // elsif (mask_fail == EXEC) + // PC += 4; + // elsif (bitcount(mask_fail) < bitcount(mask_pass)) + // EXEC = mask_fail; + // SGPR[CSP*4] = { target_addr, mask_pass }; + // CSP++; + // PC += 4; + // else + // EXEC = mask_pass; + // SGPR[CSP*4] = { PC + 4, mask_fail }; + // CSP++; + // PC = target_addr; + // end. + // Conditional branch using branch-stack. + // S0 = compare mask(vcc or any sgpr), and + // SIMM16 = signed DWORD branch offset relative to next instruction. + // See also S_CBRANCH_JOIN. + void + Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPK__S_GETREG_B32 class methods --- + + Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_getreg_b32") + { + setFlag(ALU); + } // Inst_SOPK__S_GETREG_B32 + + Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32() + { + } // ~Inst_SOPK__S_GETREG_B32 + + // --- description from .arch file --- + // D.u = hardware-reg. Read some or all of a hardware register into the + // LSBs of D. + // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size + // is 1..32. + void + Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ScalarRegU32 hwregId = simm16 & 0x3f; + ScalarRegU32 offset = (simm16 >> 6) & 31; + ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + + ScalarRegU32 hwreg = + gpuDynInst->computeUnit()->shader->getHwReg(hwregId); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + sdst.read(); + + // Store value from hardware to part of the SDST. + ScalarRegU32 mask = (((1U << size) - 1U) << offset); + sdst = (hwreg & mask) >> offset; + sdst.write(); + } // execute + // --- Inst_SOPK__S_SETREG_B32 class methods --- + + Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_setreg_b32") + { + setFlag(ALU); + } // Inst_SOPK__S_SETREG_B32 + + Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32() + { + } // ~Inst_SOPK__S_SETREG_B32 + + // --- description from .arch file --- + // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware + // register. + // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size + // is 1..32. + void + Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ScalarRegU32 hwregId = simm16 & 0x3f; + ScalarRegU32 offset = (simm16 >> 6) & 31; + ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + + ScalarRegU32 hwreg = + gpuDynInst->computeUnit()->shader->getHwReg(hwregId); + ScalarOperandU32 sdst(gpuDynInst, instData.SDST); + sdst.read(); + + // Store value from SDST to part of the hardware register. + ScalarRegU32 mask = (((1U << size) - 1U) << offset); + hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask)); + gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); + + // set MODE register to control the behavior of single precision + // floating-point numbers: denormal mode or round mode + if (hwregId==1 && size==2 + && (offset==4 || offset==0)) { + warn_once("Be cautious that s_setreg_b32 has no real effect " + "on FP modes: %s\n", gpuDynInst->disassemble()); + return; + } + + // panic if not changing MODE of floating-point numbers + panicUnimplemented(); + } // execute + // --- Inst_SOPK__S_SETREG_IMM32_B32 class methods --- + + Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32( + InFmt_SOPK *iFmt) + : Inst_SOPK(iFmt, "s_setreg_imm32_b32") + { + setFlag(ALU); + } // Inst_SOPK__S_SETREG_IMM32_B32 + + Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32() + { + } // ~Inst_SOPK__S_SETREG_IMM32_B32 + + // --- description from .arch file --- + // Write some or all of the LSBs of IMM32 into a hardware register; this + // --- instruction requires a 32-bit literal constant. + // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size + // is 1..32. + void + Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI16 simm16 = instData.SIMM16; + ScalarRegU32 hwregId = simm16 & 0x3f; + ScalarRegU32 offset = (simm16 >> 6) & 31; + ScalarRegU32 size = ((simm16 >> 11) & 31) + 1; + + ScalarRegU32 hwreg = + gpuDynInst->computeUnit()->shader->getHwReg(hwregId); + ScalarRegI32 simm32 = extData.imm_u32; + + // Store value from SIMM32 to part of the hardware register. + ScalarRegU32 mask = (((1U << size) - 1U) << offset); + hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask)); + gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg); + + // set MODE register to control the behavior of single precision + // floating-point numbers: denormal mode or round mode + if (hwregId==HW_REG_MODE && size==2 + && (offset==4 || offset==0)) { + warn_once("Be cautious that s_setreg_imm32_b32 has no real effect " + "on FP modes: %s\n", gpuDynInst->disassemble()); + return; + } + + // panic if not changing modes of single-precision FPs + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/sopp.cc b/src/arch/amdgpu/vega/insts/sopp.cc new file mode 100644 index 0000000000..781113b204 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/sopp.cc @@ -0,0 +1,922 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "debug/GPUSync.hh" +#include "gpu-compute/shader.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_SOPP__S_NOP class methods --- + + Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_nop") + { + setFlag(Nop); + } // Inst_SOPP__S_NOP + + Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP() + { + } // ~Inst_SOPP__S_NOP + + // --- description from .arch file --- + // Do nothing. Repeat NOP 1..8 times based on SIMM16[2:0] -- 0 = 1 time, + // 7 = 8 times. + // This instruction may be used to introduce wait states to resolve + // hazards; see the shader programming guide for details. Compare with + // S_SLEEP. + void + Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_SOPP__S_ENDPGM class methods --- + + Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_endpgm") + { + setFlag(EndOfKernel); + } // Inst_SOPP__S_ENDPGM + + Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM() + { + } // ~Inst_SOPP__S_ENDPGM + + // --- description from .arch file --- + // End of program; terminate wavefront. + // The hardware implicitly executes S_WAITCNT 0 before executing this + // --- instruction. + // See S_ENDPGM_SAVED for the context-switch version of this instruction. + void + Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ComputeUnit *cu = gpuDynInst->computeUnit(); + + // delete extra instructions fetched for completed work-items + wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1, + wf->instructionBuffer.end()); + + if (wf->pendingFetch) { + wf->dropFetch = true; + } + + wf->computeUnit->fetchStage.fetchUnit(wf->simdId) + .flushBuf(wf->wfSlotId); + wf->setStatus(Wavefront::S_STOPPED); + + int refCount = wf->computeUnit->getLds() + .decreaseRefCounter(wf->dispatchId, wf->wgId); + + /** + * The parent WF of this instruction is exiting, therefore + * it should not participate in this barrier any longer. This + * prevents possible deadlock issues if WFs exit early. + */ + int bar_id = WFBarrier::InvalidID; + if (wf->hasBarrier()) { + assert(wf->getStatus() != Wavefront::S_BARRIER); + bar_id = wf->barrierId(); + assert(bar_id != WFBarrier::InvalidID); + wf->releaseBarrier(); + cu->decMaxBarrierCnt(bar_id); + DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the " + "program and decrementing max barrier count for " + "barrier Id%d. New max count: %d.\n", cu->cu_id, + wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id, + cu->maxBarrierCnt(bar_id)); + } + + DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", + wf->computeUnit->cu_id, wf->wgId, refCount); + + wf->computeUnit->registerManager->freeRegisters(wf); + wf->computeUnit->stats.completedWfs++; + wf->computeUnit->activeWaves--; + + panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less " + "than zero\n", wf->computeUnit->cu_id); + + DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", + wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId); + + for (int i = 0; i < wf->vecReads.size(); i++) { + if (wf->rawDist.find(i) != wf->rawDist.end()) { + wf->stats.readsPerWrite.sample(wf->vecReads.at(i)); + } + } + wf->vecReads.clear(); + wf->rawDist.clear(); + wf->lastInstExec = 0; + + if (!refCount) { + /** + * If all WFs have finished, and hence the WG has finished, + * then we can free up the barrier belonging to the parent + * WG, but only if we actually used a barrier (i.e., more + * than one WF in the WG). + */ + if (bar_id != WFBarrier::InvalidID) { + DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are " + "now complete. Releasing barrier Id%d.\n", cu->cu_id, + wf->simdId, wf->wfSlotId, wf->wfDynId, + wf->barrierId()); + cu->releaseBarrier(bar_id); + } + + /** + * Last wavefront of the workgroup has executed return. If the + * workgroup is not the final one in the kernel, then simply + * retire it; however, if it is the final one, i.e., indicating + * the kernel end, then release operation (i.e., GL2 WB) is + * needed + */ + + //check whether the workgroup is indicating the kernel end, i.e., + //the last workgroup in the kernel + bool kernelEnd = + wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf); + + bool relNeeded = + wf->computeUnit->shader->impl_kern_end_rel; + + //if it is not a kernel end, then retire the workgroup directly + if (!kernelEnd || !relNeeded) { + wf->computeUnit->shader->dispatcher().notifyWgCompl(wf); + wf->setStatus(Wavefront::S_STOPPED); + wf->computeUnit->stats.completedWGs++; + + return; + } + + /** + * if it is a kernel end, inject a memory sync, i.e., GL2 WB, and + * retire the workgroup after receving response. + * note that GL0V and GL1 are read only, and they just forward GL2 + * WB request. When forwarding, GL1 send the request to all GL2 in + * the complex + */ + setFlag(MemSync); + setFlag(GlobalSegment); + // Notify Memory System of Kernel Completion + // Kernel End = isKernel + isMemSync + wf->setStatus(Wavefront::S_RETURNING); + gpuDynInst->simdId = wf->simdId; + gpuDynInst->wfSlotId = wf->wfSlotId; + gpuDynInst->wfDynId = wf->wfDynId; + + DPRINTF(GPUExec, "inject global memory fence for CU%d: " + "WF[%d][%d][%d]\n", wf->computeUnit->cu_id, + wf->simdId, wf->wfSlotId, wf->wfDynId); + + // call shader to prepare the flush operations + wf->computeUnit->shader->prepareFlush(gpuDynInst); + + wf->computeUnit->stats.completedWGs++; + } else { + wf->computeUnit->shader->dispatcher().scheduleDispatch(); + } + } // execute + + // --- Inst_SOPP__S_BRANCH class methods --- + + Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_branch") + { + setFlag(Branch); + } // Inst_SOPP__S_BRANCH + + Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH() + { + } // ~Inst_SOPP__S_BRANCH + + // --- description from .arch file --- + // PC = PC + signext(SIMM16 * 4) + 4 (short jump). + // For a long jump, use S_SETPC. + void + Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_WAKEUP class methods --- + + Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_wakeup") + { + } // Inst_SOPP__S_WAKEUP + + Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP() + { + } // ~Inst_SOPP__S_WAKEUP + + // --- description from .arch file --- + // Allow a wave to 'ping' all the other waves in its threadgroup to force + // them to wake up immediately from an S_SLEEP instruction. The ping is + // ignored if the waves are not sleeping. + // This allows for more efficient polling on a memory location. The waves + // which are polling can sit in a long S_SLEEP between memory reads, but + // the wave which writes the value can tell them all to wake up early now + // that the data is available. This is useful for fBarrier implementations + // (speedup). + // This method is also safe from races because if any wave misses the ping, + // everything still works fine (whoever missed it just completes their + // normal S_SLEEP). + void + Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_SCC0 class methods --- + + Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_scc0") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_SCC0 + + Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0() + { + } // ~Inst_SOPP__S_CBRANCH_SCC0 + + // --- description from .arch file --- + // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + scc.read(); + + if (!scc.rawData()) { + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + } + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_CBRANCH_SCC1 class methods --- + + Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_scc1") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_SCC1 + + Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1() + { + } // ~Inst_SOPP__S_CBRANCH_SCC1 + + // --- description from .arch file --- + // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + ConstScalarOperandU32 scc(gpuDynInst, REG_SCC); + + scc.read(); + + if (scc.rawData()) { + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + } + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_CBRANCH_VCCZ class methods --- + + Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_vccz") + { + setFlag(Branch); + setFlag(ReadsVCC); + } // Inst_SOPP__S_CBRANCH_VCCZ + + Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ() + { + } // ~Inst_SOPP__S_CBRANCH_VCCZ + + // --- description from .arch file --- + // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + + vcc.read(); + + if (!vcc.rawData()) { + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + } + + wf->pc(pc); + } // execute + // --- Inst_SOPP__S_CBRANCH_VCCNZ class methods --- + + Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_vccnz") + { + setFlag(Branch); + setFlag(ReadsVCC); + } // Inst_SOPP__S_CBRANCH_VCCNZ + + Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ() + { + } // ~Inst_SOPP__S_CBRANCH_VCCNZ + + // --- description from .arch file --- + // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + vcc.read(); + + if (vcc.rawData()) { + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + wf->pc(pc); + } + } // execute + // --- Inst_SOPP__S_CBRANCH_EXECZ class methods --- + + Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_execz") + { + setFlag(Branch); + setFlag(ReadsEXEC); + } // Inst_SOPP__S_CBRANCH_EXECZ + + Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ() + { + } // ~Inst_SOPP__S_CBRANCH_EXECZ + + // --- description from .arch file --- + // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (wf->execMask().none()) { + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + wf->pc(pc); + } + } // execute + // --- Inst_SOPP__S_CBRANCH_EXECNZ class methods --- + + Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_execnz") + { + setFlag(Branch); + setFlag(ReadsEXEC); + } // Inst_SOPP__S_CBRANCH_EXECNZ + + Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ() + { + } // ~Inst_SOPP__S_CBRANCH_EXECNZ + + // --- description from .arch file --- + // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (wf->execMask().any()) { + Addr pc = gpuDynInst->pc(); + ScalarRegI16 simm16 = instData.SIMM16; + pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL; + wf->pc(pc); + } + } // execute + // --- Inst_SOPP__S_BARRIER class methods --- + + Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_barrier") + { + setFlag(MemBarrier); + } // Inst_SOPP__S_BARRIER + + Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER() + { + } // ~Inst_SOPP__S_BARRIER + + // --- description from .arch file --- + // Synchronize waves within a threadgroup. + // If not all waves of the threadgroup have been created yet, waits for + // entire group before proceeding. + // If some waves in the threadgroup have already terminated, this waits on + // only the surviving waves. + // Barriers are legal inside trap handlers. + void + Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ComputeUnit *cu = gpuDynInst->computeUnit(); + + if (wf->hasBarrier()) { + int bar_id = wf->barrierId(); + assert(wf->getStatus() == Wavefront::S_BARRIER); + cu->incNumAtBarrier(bar_id); + DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at " + "barrier Id%d. %d waves now at barrier, %d waves " + "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId, + wf->wfDynId, bar_id, cu->numAtBarrier(bar_id), + cu->numYetToReachBarrier(bar_id)); + } + } // execute + // --- Inst_SOPP__S_SETKILL class methods --- + + Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_setkill") + { + } // Inst_SOPP__S_SETKILL + + Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL() + { + } // ~Inst_SOPP__S_SETKILL + + // --- description from .arch file --- + // set KILL bit to value of SIMM16[0]. + // Used primarily for debugging kill wave host command behavior. + void + Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_WAITCNT class methods --- + + Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_waitcnt") + { + setFlag(ALU); + setFlag(Waitcnt); + } // Inst_SOPP__S_WAITCNT + + Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT() + { + } // ~Inst_SOPP__S_WAITCNT + + // --- description from .arch file --- + // Wait for the counts of outstanding lds, vector-memory and + // --- export/vmem-write-data to be at or below the specified levels. + // SIMM16[3:0] = vmcount (vector memory operations), + // SIMM16[6:4] = export/mem-write-data count, + // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count). + void + Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 vm_cnt = 0; + ScalarRegI32 exp_cnt = 0; + ScalarRegI32 lgkm_cnt = 0; + vm_cnt = bits(instData.SIMM16, 3, 0); + exp_cnt = bits(instData.SIMM16, 6, 4); + lgkm_cnt = bits(instData.SIMM16, 12, 8); + gpuDynInst->wavefront()->setStatus(Wavefront::S_WAITCNT); + gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt); + } // execute + // --- Inst_SOPP__S_SETHALT class methods --- + + Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sethalt") + { + } // Inst_SOPP__S_SETHALT + + Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT() + { + } // ~Inst_SOPP__S_SETHALT + + // --- description from .arch file --- + // Set HALT bit to value of SIMM16[0]; 1 = halt, 0 = resume. + // The halt flag is ignored while PRIV == 1 (inside trap handlers) but the + // shader will halt immediately after the handler returns if HALT is still + // set at that time. + void + Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SLEEP class methods --- + + Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sleep") + { + setFlag(ALU); + setFlag(Sleep); + } // Inst_SOPP__S_SLEEP + + Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP() + { + } // ~Inst_SOPP__S_SLEEP + + // --- description from .arch file --- + // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks. + // The exact amount of delay is approximate. Compare with S_NOP. + void + Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16; + gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP); + // sleep duration is specified in multiples of 64 cycles + gpuDynInst->wavefront()->setSleepTime(64 * simm16); + } // execute + // --- Inst_SOPP__S_SETPRIO class methods --- + + Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_setprio") + { + setFlag(ALU); + } // Inst_SOPP__S_SETPRIO + + Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO() + { + } // ~Inst_SOPP__S_SETPRIO + + // --- description from .arch file --- + // User settable wave priority is set to SIMM16[1:0]. 0 = lowest, + // 3 = highest. + // The overall wave priority is {SPIPrio[1:0] + UserPrio[1:0], + // WaveAge[3:0]}. + void + Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst) + { + ScalarRegU16 simm16 = instData.SIMM16; + ScalarRegU32 userPrio = simm16 & 0x3; + + warn_once("S_SETPRIO ignored -- Requested priority %d\n", userPrio); + } // execute + // --- Inst_SOPP__S_SENDMSG class methods --- + + Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sendmsg") + { + } // Inst_SOPP__S_SENDMSG + + Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG() + { + } // ~Inst_SOPP__S_SENDMSG + + // --- description from .arch file --- + // Send a message upstream to VGT or the interrupt handler. + // SIMM16[9:0] contains the message type and is documented in the shader + // --- programming guide. + void + Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SENDMSGHALT class methods --- + + Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_sendmsghalt") + { + } // Inst_SOPP__S_SENDMSGHALT + + Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT() + { + } // ~Inst_SOPP__S_SENDMSGHALT + + // --- description from .arch file --- + // Send a message and then HALT the wavefront; see S_SENDMSG for details. + void + Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_TRAP class methods --- + + Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_trap") + { + } // Inst_SOPP__S_TRAP + + Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP() + { + } // ~Inst_SOPP__S_TRAP + + // --- description from .arch file --- + // TrapID = SIMM16[7:0]; + // Wait for all instructions to complete; + // set {TTMP1, TTMP0} = {3'h0, PCRewind[3:0], HT[0], TrapID[7:0], + // PC[47:0]}; + // PC = TBA (trap base address); + // PRIV = 1. + // Enter the trap handler. This instruction may be generated internally as + // well in response to a host trap (HT = 1) or an exception. + // TrapID 0 is reserved for hardware use and should not be used in a + // shader-generated trap. + void + Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_ICACHE_INV class methods --- + + Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_icache_inv") + { + setFlag(MemBarrier); + setFlag(GPUStaticInst::MemSync); + setFlag(MemSync); + } // Inst_SOPP__S_ICACHE_INV + + Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV() + { + } // ~Inst_SOPP__S_ICACHE_INV + + // --- description from .arch file --- + // Invalidate entire L1 instruction cache. + // You must have 12 separate S_NOP instructions or a jump/branch + // instruction after this instruction + // to ensure the SQ instruction buffer is purged. + void + Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + + if (gpuDynInst->exec_mask.none()) { + wf->decLGKMInstsIssued(); + return; + } + + gpuDynInst->execUnitId = wf->execUnitId; + gpuDynInst->latency.init(gpuDynInst->computeUnit()); + gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod()); + + gpuDynInst->resetEntireStatusVector(); + gpuDynInst->setStatusVector(0, 1); + RequestPtr req = std::make_shared(0, 0, 0, + gpuDynInst->computeUnit()-> + requestorId(), 0, + gpuDynInst->wfDynId); + gpuDynInst->setRequestFlags(req); + gpuDynInst->computeUnit()->scalarMemoryPipe. + injectScalarMemFence(gpuDynInst, false, req); + } // execute + // --- Inst_SOPP__S_INCPERFLEVEL class methods --- + + Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_incperflevel") + { + } // Inst_SOPP__S_INCPERFLEVEL + + Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL() + { + } // ~Inst_SOPP__S_INCPERFLEVEL + + // --- description from .arch file --- + // Increment performance counter specified in SIMM16[3:0] by 1. + void + Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_DECPERFLEVEL class methods --- + + Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_decperflevel") + { + } // Inst_SOPP__S_DECPERFLEVEL + + Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL() + { + } // ~Inst_SOPP__S_DECPERFLEVEL + + // --- description from .arch file --- + // Decrement performance counter specified in SIMM16[3:0] by 1. + void + Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_TTRACEDATA class methods --- + + Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_ttracedata") + { + } // Inst_SOPP__S_TTRACEDATA + + Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA() + { + } // ~Inst_SOPP__S_TTRACEDATA + + // --- description from .arch file --- + // Send M0 as user data to the thread trace stream. + void + Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGSYS class methods --- + + Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbgsys") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGSYS + + Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGSYS + + // --- description from .arch file --- + // if (conditional_debug_system != 0) then PC = PC + signext(SIMM16 * 4) + // + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGUSER class methods --- + + Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbguser") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGUSER + + Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGUSER + + // --- description from .arch file --- + // if (conditional_debug_user != 0) then PC = PC + signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER class methods --- + + Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER + + Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER:: + ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER + + // --- description from .arch file --- + // if (conditional_debug_system || conditional_debug_user) then PC = PC + + // --- signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER class methods --- + + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user") + { + setFlag(Branch); + } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER + + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER:: + ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER() + { + } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER + + // --- description from .arch file --- + // if (conditional_debug_system && conditional_debug_user) then PC = PC + + // --- signext(SIMM16 * 4) + 4; + // else NOP. + void + Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_ENDPGM_SAVED class methods --- + + Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_endpgm_saved") + { + } // Inst_SOPP__S_ENDPGM_SAVED + + Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED() + { + } // ~Inst_SOPP__S_ENDPGM_SAVED + + // --- description from .arch file --- + // End of program; signal that a wave has been saved by the context-switch + // trap handler and terminate wavefront. + // The hardware implicitly executes S_WAITCNT 0 before executing this + // instruction. + // Use S_ENDPGM in all cases unless you are executing the context-switch + // save handler. + void + Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SET_GPR_IDX_OFF class methods --- + + Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_set_gpr_idx_off") + { + } // Inst_SOPP__S_SET_GPR_IDX_OFF + + Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF() + { + } // ~Inst_SOPP__S_SET_GPR_IDX_OFF + + // --- description from .arch file --- + // MODE.gpr_idx_en = 0. + // Clear GPR indexing mode. Vector operations after this will not perform + // --- relative GPR addressing regardless of the contents of M0. This + // --- instruction does not modify M0. + void + Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_SOPP__S_SET_GPR_IDX_MODE class methods --- + + Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE( + InFmt_SOPP *iFmt) + : Inst_SOPP(iFmt, "s_set_gpr_idx_mode") + { + } // Inst_SOPP__S_SET_GPR_IDX_MODE + + Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE() + { + } // ~Inst_SOPP__S_SET_GPR_IDX_MODE + + // --- description from .arch file --- + // M0[15:12] = SIMM4. + // Modify the mode used for vector GPR indexing. + // The raw contents of the source field are read and used to set the enable + // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL + // and SIMM4[3] = VDST_REL. + void + Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vinterp.cc b/src/arch/amdgpu/vega/insts/vinterp.cc new file mode 100644 index 0000000000..784f6f2eb2 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vinterp.cc @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VINTRP__V_INTERP_P1_F32 class methods --- + + Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32( + InFmt_VINTRP *iFmt) + : Inst_VINTRP(iFmt, "v_interp_p1_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VINTRP__V_INTERP_P1_F32 + + Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32() + { + } // ~Inst_VINTRP__V_INTERP_P1_F32 + + // --- description from .arch file --- + // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; + // if D == S then data corruption will occur. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VINTRP__V_INTERP_P2_F32 class methods --- + + Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32( + InFmt_VINTRP *iFmt) + : Inst_VINTRP(iFmt, "v_interp_p2_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VINTRP__V_INTERP_P2_F32 + + Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32() + { + } // ~Inst_VINTRP__V_INTERP_P2_F32 + + // --- description from .arch file --- + // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VINTRP__V_INTERP_MOV_F32 class methods --- + + Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32( + InFmt_VINTRP *iFmt) + : Inst_VINTRP(iFmt, "v_interp_mov_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VINTRP__V_INTERP_MOV_F32 + + Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32() + { + } // ~Inst_VINTRP__V_INTERP_MOV_F32 + + // --- description from .arch file --- + // D.f = {P10,P20,P0}[S.u]; parameter load. + void + Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc new file mode 100644 index 0000000000..f970923951 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -0,0 +1,2435 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP1__V_NOP class methods --- + + Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_nop") + { + setFlag(Nop); + setFlag(ALU); + } // Inst_VOP1__V_NOP + + Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP() + { + } // ~Inst_VOP1__V_NOP + + // --- description from .arch file --- + // Do nothing. + void + Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_VOP1__V_MOV_B32 class methods --- + + Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_mov_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_MOV_B32 + + Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32() + { + } // ~Inst_VOP1__V_MOV_B32 + + // --- description from .arch file --- + // D.u = S0.u. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (isDPPInst()) { + VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src_dpp.read(); + + DPRINTF(VEGA, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + // NOTE: For VOP1, there is no SRC1, so make sure we're not trying + // to negate it or take the absolute value of it + assert(!extData.iFmt_VOP_DPP.SRC1_ABS); + assert(!extData.iFmt_VOP_DPP.SRC1_NEG); + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src_dpp[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_READFIRSTLANE_B32 class methods --- + + Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_readfirstlane_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_READFIRSTLANE_B32 + + Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32() + { + } // ~Inst_VOP1__V_READFIRSTLANE_B32 + + // --- description from .arch file --- + // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data + // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec) + // (Lane# = 0 if exec is zero). Ignores exec mask for the access. SQ + // translates to V_READLANE_B32. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarRegI32 src_lane(0); + ScalarRegU64 exec_mask = wf->execMask().to_ullong(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + ScalarOperandU32 sdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (exec_mask) { + src_lane = findLsbSet(exec_mask); + } + + sdst = src[src_lane]; + + sdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_I32_F64 class methods --- + + Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_i32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_I32_F64 + + Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64() + { + } // ~Inst_VOP1__V_CVT_I32_F64 + + // --- description from .arch file --- + // D.i = (int)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F64_I32 class methods --- + + Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f64_i32") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F64_I32 + + Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32() + { + } // ~Inst_VOP1__V_CVT_F64_I32 + + // --- description from .arch file --- + // D.d = (double)S0.i. + void + Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_I32 class methods --- + + Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_i32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_I32 + + Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32() + { + } // ~Inst_VOP1__V_CVT_F32_I32 + + // --- description from .arch file --- + // D.f = (float)S0.i. + void + Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_U32 class methods --- + + Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_u32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_U32 + + Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32() + { + } // ~Inst_VOP1__V_CVT_F32_U32 + + // --- description from .arch file --- + // D.f = (float)S0.u. + void + Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_U32_F32 class methods --- + + Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_u32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_U32_F32 + + Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32() + { + } // ~Inst_VOP1__V_CVT_U32_F32 + + // --- description from .arch file --- + // D.u = (unsigned)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_I32_F32 class methods --- + + Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_I32_F32 + + Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32() + { + } // ~Inst_VOP1__V_CVT_I32_F32 + + // --- description from .arch file --- + // D.i = (int)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_MOV_FED_B32 class methods --- + + Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_mov_fed_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_MOV_FED_B32 + + Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32() + { + } // ~Inst_VOP1__V_MOV_FED_B32 + + // --- description from .arch file --- + // D.u = S0.u; + // Introduce EDC double error upon write to dest vgpr without causing an + // --- exception. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F16_F32 class methods --- + + Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f16_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F16_F32 + + Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32() + { + } // ~Inst_VOP1__V_CVT_F16_F32 + + // --- description from .arch file --- + // D.f16 = flt32_to_flt16(S0.f). + // Supports input modifiers and creates FP16 denormals when appropriate. + void + Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + float tmp = src[lane]; + AMDGPU::mxfloat16 out(tmp); + + vdst[lane] = (out.data >> 16); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_F16 class methods --- + + Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_f16") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_F16 + + Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16() + { + } // ~Inst_VOP1__V_CVT_F32_F16 + + // --- description from .arch file --- + // D.f = flt16_to_flt32(S0.f16). + // FP16 denormal inputs are always accepted. + void + Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + AMDGPU::mxfloat16 tmp(src[lane]); + vdst[lane] = float(tmp); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_RPI_I32_F32 class methods --- + + Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_RPI_I32_F32 + + Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32() + { + } // ~Inst_VOP1__V_CVT_RPI_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f + 0.5). + void + Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_FLR_I32_F32 class methods --- + + Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_FLR_I32_F32 + + Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32() + { + } // ~Inst_VOP1__V_CVT_FLR_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f). + void + Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_OFF_F32_I4 class methods --- + + Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_off_f32_i4") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_OFF_F32_I4 + + Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4() + { + } // ~Inst_VOP1__V_CVT_OFF_F32_I4 + + // --- description from .arch file --- + // 4-bit signed int to 32-bit float. Used for interpolation in shader. + void + Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) + { + // Could not parse sq_uc.arch desc field + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F32_F64 class methods --- + + Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F32_F64 + + Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64() + { + } // ~Inst_VOP1__V_CVT_F32_F64 + + // --- description from .arch file --- + // D.f = (float)S0.d. + void + Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F64_F32 class methods --- + + Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f64_f32") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F64_F32 + + Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32() + { + } // ~Inst_VOP1__V_CVT_F64_F32 + + // --- description from .arch file --- + // D.d = (double)S0.f. + void + Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE0 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE0 + + Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE0 + + // --- description from .arch file --- + // D.f = (float)(S0.u[7:0]). + void + Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE1 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE1 + + Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE1 + + // --- description from .arch file --- + // D.f = (float)(S0.u[15:8]). + void + Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE2 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE2 + + Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE2 + + // --- description from .arch file --- + // D.f = (float)(S0.u[23:16]). + void + Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F32_UBYTE3 class methods --- + + Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CVT_F32_UBYTE3 + + Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3() + { + } // ~Inst_VOP1__V_CVT_F32_UBYTE3 + + // --- description from .arch file --- + // D.f = (float)(S0.u[31:24]). + void + Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_U32_F64 class methods --- + + Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_u32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_U32_F64 + + Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64() + { + } // ~Inst_VOP1__V_CVT_U32_F64 + + // --- description from .arch file --- + // D.u = (unsigned)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F64_U32 class methods --- + + Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f64_u32") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CVT_F64_U32 + + Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32() + { + } // ~Inst_VOP1__V_CVT_F64_U32 + + // --- description from .arch file --- + // D.d = (double)S0.u. + void + Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_TRUNC_F64 class methods --- + + Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_trunc_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_TRUNC_F64 + + Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64() + { + } // ~Inst_VOP1__V_TRUNC_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d), return integer part of S0.d. + void + Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CEIL_F64 class methods --- + + Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ceil_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_CEIL_F64 + + Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64() + { + } // ~Inst_VOP1__V_CEIL_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. + void + Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RNDNE_F64 class methods --- + + Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rndne_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_RNDNE_F64 + + Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64() + { + } // ~Inst_VOP1__V_RNDNE_F64 + + // --- description from .arch file --- + // D.d = round_nearest_even(S0.d). + void + Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FLOOR_F64 class methods --- + + Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_floor_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FLOOR_F64 + + Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64() + { + } // ~Inst_VOP1__V_FLOOR_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. + void + Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FRACT_F32 class methods --- + + Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_fract_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FRACT_F32 + + Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32() + { + } // ~Inst_VOP1__V_FRACT_F32 + + // --- description from .arch file --- + // D.f = S0.f - floor(S0.f). + void + Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_TRUNC_F32 class methods --- + + Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_trunc_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_TRUNC_F32 + + Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32() + { + } // ~Inst_VOP1__V_TRUNC_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f), return integer part of S0.f. + void + Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst (gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CEIL_F32 class methods --- + + Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ceil_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_CEIL_F32 + + Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32() + { + } // ~Inst_VOP1__V_CEIL_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. + void + Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RNDNE_F32 class methods --- + + Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rndne_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RNDNE_F32 + + Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32() + { + } // ~Inst_VOP1__V_RNDNE_F32 + + // --- description from .arch file --- + // D.f = round_nearest_even(S0.f). + void + Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FLOOR_F32 class methods --- + + Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_floor_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FLOOR_F32 + + Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32() + { + } // ~Inst_VOP1__V_FLOOR_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. + void + Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_EXP_F32 class methods --- + + Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_exp_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_EXP_F32 + + Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32() + { + } // ~Inst_VOP1__V_EXP_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f). + void + Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_LOG_F32 class methods --- + + Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_log_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_LOG_F32 + + Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32() + { + } // ~Inst_VOP1__V_LOG_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm. + void + Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RCP_F32 class methods --- + + Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RCP_F32 + + Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32() + { + } // ~Inst_VOP1__V_RCP_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. + void + Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RCP_IFLAG_F32 class methods --- + + Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_iflag_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RCP_IFLAG_F32 + + Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32() + { + } // ~Inst_VOP1__V_RCP_IFLAG_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise + // --- integer DIV_BY_ZERO exception but cannot raise floating-point + // --- exceptions. + void + Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RSQ_F32 class methods --- + + Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rsq_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_RSQ_F32 + + Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32() + { + } // ~Inst_VOP1__V_RSQ_F32 + + // --- description from .arch file --- + // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. + void + Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RCP_F64 class methods --- + + Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_RCP_F64 + + Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64() + { + } // ~Inst_VOP1__V_RCP_F64 + + // --- description from .arch file --- + // D.d = 1.0 / S0.d. + void + Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = 1.0 / src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_RSQ_F64 class methods --- + + Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rsq_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_RSQ_F64 + + Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64() + { + } // ~Inst_VOP1__V_RSQ_F64 + + // --- description from .arch file --- + // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. + void + Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane]) + && !std::signbit(src[lane])) { + vdst[lane] = 0.0; + } else if (std::signbit(src[lane])) { + vdst[lane] = NAN; + } else { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_SQRT_F32 class methods --- + + Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sqrt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_SQRT_F32 + + Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32() + { + } // ~Inst_VOP1__V_SQRT_F32 + + // --- description from .arch file --- + // D.f = sqrt(S0.f). + void + Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_SQRT_F64 class methods --- + + Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sqrt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_SQRT_F64 + + Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64() + { + } // ~Inst_VOP1__V_SQRT_F64 + + // --- description from .arch file --- + // D.d = sqrt(S0.d). + void + Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_SIN_F32 class methods --- + + Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sin_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_SIN_F32 + + Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32() + { + } // ~Inst_VOP1__V_SIN_F32 + + // --- description from .arch file --- + // D.f = sin(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 0.0. + void + Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (src[lane] < -256.0 || src[lane] > 256.0) { + vdst[lane] = 0.0; + } else { + vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData()); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_COS_F32 class methods --- + + Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cos_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_COS_F32 + + Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32() + { + } // ~Inst_VOP1__V_COS_F32 + + // --- description from .arch file --- + // D.f = cos(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 1.0. + void + Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (src[lane] < -256.0 || src[lane] > 256.0) { + vdst[lane] = 0.0; + } else { + vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData()); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_NOT_B32 class methods --- + + Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_not_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_NOT_B32 + + Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32() + { + } // ~Inst_VOP1__V_NOT_B32 + + // --- description from .arch file --- + // D.u = ~S0.u. + // Input and output modifiers not supported. + void + Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ~src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_BFREV_B32 class methods --- + + Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_bfrev_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_BFREV_B32 + + Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32() + { + } // ~Inst_VOP1__V_BFREV_B32 + + // --- description from .arch file --- + // D.u[31:0] = S0.u[0:31], bitfield reverse. + // Input and output modifiers not supported. + void + Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = reverseBits(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FFBH_U32 class methods --- + + Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ffbh_u32") + { + setFlag(ALU); + } // Inst_VOP1__V_FFBH_U32 + + Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32() + { + } // ~Inst_VOP1__V_FFBH_U32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from MSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOneMsb(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FFBL_B32 class methods --- + + Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ffbl_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_FFBL_B32 + + Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32() + { + } // ~Inst_VOP1__V_FFBL_B32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from LSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOne(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FFBH_I32 class methods --- + + Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ffbh_i32") + { + setFlag(ALU); + } // Inst_VOP1__V_FFBH_I32 + + Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32() + { + } // ~Inst_VOP1__V_FFBH_I32 + + // --- description from .arch file --- + // D.u = position of first bit different from sign bit in S0.i from MSB; + // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. + void + Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, instData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = firstOppositeSignBit(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_EXP_I32_F64 class methods --- + + Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FREXP_EXP_I32_F64 + + Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64() + { + } // ~Inst_VOP1__V_FREXP_EXP_I32_F64 + + // --- description from .arch file --- + // See V_FREXP_EXP_I32_F32. + void + Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp = 0; + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_MANT_F64 class methods --- + + Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_mant_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FREXP_MANT_F64 + + Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64() + { + } // ~Inst_VOP1__V_FREXP_MANT_F64 + + // --- description from .arch file --- + // See V_FREXP_MANT_F32. + void + Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = src[lane]; + } else { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FRACT_F64 class methods --- + + Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_fract_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP1__V_FRACT_F64 + + Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64() + { + } // ~Inst_VOP1__V_FRACT_F64 + + // --- description from .arch file --- + // See V_FRACT_F32. + void + Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, instData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF64 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_EXP_I32_F32 class methods --- + + Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FREXP_EXP_I32_F32 + + Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32() + { + } // ~Inst_VOP1__V_FREXP_EXP_I32_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.i = 0; + // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). + // Returns exponent of single precision float input, such that S0.f = + // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns + // the significand. + void + Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp(0); + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_FREXP_MANT_F32 class methods --- + + Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_mant_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_FREXP_MANT_F32 + + Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32() + { + } // ~Inst_VOP1__V_FREXP_MANT_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; + // else D.f = Mantissa(S0.f). + // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary + // --- significand of single precision float input, such that S0.f = + // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which + // --- returns integer exponent. + void + Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = src[lane]; + } else { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CLREXCP class methods --- + + Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_clrexcp") + { + setFlag(ALU); + } // Inst_VOP1__V_CLREXCP + + Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP() + { + } // ~Inst_VOP1__V_CLREXCP + + // --- description from .arch file --- + // Clear wave's exception state in SIMD (SP). + void + Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_MOV_B64 class methods --- + + Inst_VOP1__V_MOV_B64::Inst_VOP1__V_MOV_B64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_mov_b64") + { + setFlag(ALU); + } // Inst_VOP1__V_MOV_B64 + + Inst_VOP1__V_MOV_B64::~Inst_VOP1__V_MOV_B64() + { + } // ~Inst_VOP1__V_MOV_B64 + + // --- description from .arch file --- + // D.u = S0.u. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_MOV_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src(gpuDynInst, instData.SRC0); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64"); + panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64"); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_CVT_F16_U16 class methods --- + + Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f16_u16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_F16_U16 + + Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16() + { + } // ~Inst_VOP1__V_CVT_F16_U16 + + // --- description from .arch file --- + // D.f16 = uint16_to_flt16(S.u16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_F16_I16 class methods --- + + Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_f16_i16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_F16_I16 + + Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16() + { + } // ~Inst_VOP1__V_CVT_F16_I16 + + // --- description from .arch file --- + // D.f16 = int16_to_flt16(S.i16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_U16_F16 class methods --- + + Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_u16_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_U16_F16 + + Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16() + { + } // ~Inst_VOP1__V_CVT_U16_F16 + + // --- description from .arch file --- + // D.u16 = flt16_to_uint16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CVT_I16_F16 class methods --- + + Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cvt_i16_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CVT_I16_F16 + + Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16() + { + } // ~Inst_VOP1__V_CVT_I16_F16 + + // --- description from .arch file --- + // D.i16 = flt16_to_int16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_RCP_F16 class methods --- + + Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rcp_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_RCP_F16 + + Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16() + { + } // ~Inst_VOP1__V_RCP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecip(S0.f16). + void + Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_SQRT_F16 class methods --- + + Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sqrt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_SQRT_F16 + + Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16() + { + } // ~Inst_VOP1__V_SQRT_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateSqrt(S0.f16). + void + Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_RSQ_F16 class methods --- + + Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rsq_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_RSQ_F16 + + Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16() + { + } // ~Inst_VOP1__V_RSQ_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecipSqrt(S0.f16). + void + Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_LOG_F16 class methods --- + + Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_log_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_LOG_F16 + + Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16() + { + } // ~Inst_VOP1__V_LOG_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 0.0f; + // else + // D.f16 = ApproximateLog2(S0.f16). + void + Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_EXP_F16 class methods --- + + Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_exp_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_EXP_F16 + + Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16() + { + } // ~Inst_VOP1__V_EXP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 0.0f) + // D.f16 = 1.0f; + // else + // D.f16 = Approximate2ToX(S0.f16). + void + Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FREXP_MANT_F16 class methods --- + + Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_mant_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FREXP_MANT_F16 + + Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16() + { + } // ~Inst_VOP1__V_FREXP_MANT_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.f16 = S0.f16; + // else + // D.f16 = mantissa(S0.f16). + // Result range is (-1.0,-0.5][0.5,1.0). + // C math library frexp function. + // Returns binary significand of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FREXP_EXP_I16_F16 class methods --- + + Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16( + InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FREXP_EXP_I16_F16 + + Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16() + { + } // ~Inst_VOP1__V_FREXP_EXP_I16_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.i16 = 0; + // else + // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). + // C math library frexp function. + // Returns exponent of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FLOOR_F16 class methods --- + + Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_floor_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FLOOR_F16 + + Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16() + { + } // ~Inst_VOP1__V_FLOOR_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. + void + Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_CEIL_F16 class methods --- + + Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_ceil_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_CEIL_F16 + + Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16() + { + } // ~Inst_VOP1__V_CEIL_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. + void + Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_TRUNC_F16 class methods --- + + Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_trunc_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_TRUNC_F16 + + Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16() + { + } // ~Inst_VOP1__V_TRUNC_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16). + // Round-to-zero semantics. + void + Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_RNDNE_F16 class methods --- + + Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_rndne_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_RNDNE_F16 + + Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16() + { + } // ~Inst_VOP1__V_RNDNE_F16 + + // --- description from .arch file --- + // D.f16 = FLOOR(S0.f16 + 0.5f); + // if (floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. + // Round-to-nearest-even semantics. + void + Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_FRACT_F16 class methods --- + + Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_fract_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_FRACT_F16 + + Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16() + { + } // ~Inst_VOP1__V_FRACT_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + -floor(S0.f16). + void + Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_SIN_F16 class methods --- + + Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_sin_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_SIN_F16 + + Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16() + { + } // ~Inst_VOP1__V_SIN_F16 + + // --- description from .arch file --- + // D.f16 = sin(S0.f16 * 2 * PI). + void + Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_COS_F16 class methods --- + + Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_cos_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP1__V_COS_F16 + + Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16() + { + } // ~Inst_VOP1__V_COS_F16 + + // --- description from .arch file --- + // D.f16 = cos(S0.f16 * 2 * PI). + void + Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP1__V_EXP_LEGACY_F32 class methods --- + + Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_exp_legacy_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_EXP_LEGACY_F32 + + Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32() + { + } // ~Inst_VOP1__V_EXP_LEGACY_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f) with legacy semantics. + void + Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_LOG_LEGACY_F32 class methods --- + + Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_log_legacy_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP1__V_LOG_LEGACY_F32 + + Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32() + { + } // ~Inst_VOP1__V_LOG_LEGACY_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. + void + Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, instData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods --- + + Inst_VOP1__V_ACCVGPR_MOV_B32:: + Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_accvgpr_mov_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_ACCVGPR_MOV_B32 + + Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32() + { + } // ~Inst_VOP1__V_ACCVGPR_MOV_B32 + + void + Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + unsigned accum_offset = wf->accumOffset; + + ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset); + VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop2.cc b/src/arch/amdgpu/vega/insts/vop2.cc new file mode 100644 index 0000000000..55146711b6 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop2.cc @@ -0,0 +1,2221 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "debug/VEGA.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP2__V_CNDMASK_B32 class methods --- + + Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_cndmask_b32") + { + setFlag(ALU); + setFlag(ReadsVCC); + } // Inst_VOP2__V_CNDMASK_B32 + + Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32() + { + } // ~Inst_VOP2__V_CNDMASK_B32 + + // --- description from .arch file --- + // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC + // as a scalar GPR in S2. + void + Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ADD_F32 class methods --- + + Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_ADD_F32 + + Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32() + { + } // ~Inst_VOP2__V_ADD_F32 + + // --- description from .arch file --- + // D.f = S0.f + S1.f. + void + Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + VecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isDPPInst()) { + VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_dpp[lane] + src1[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_F32 class methods --- + + Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_SUB_F32 + + Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32() + { + } // ~Inst_VOP2__V_SUB_F32 + + // --- description from .arch file --- + // D.f = S0.f - S1.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_F32 class methods --- + + Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_SUBREV_F32 + + Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32() + { + } // ~Inst_VOP2__V_SUBREV_F32 + + // --- description from .arch file --- + // D.f = S1.f - S0.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_LEGACY_F32 class methods --- + + Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_legacy_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MUL_LEGACY_F32 + + Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32() + { + } // ~Inst_VOP2__V_MUL_LEGACY_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). + void + Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_F32 class methods --- + + Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MUL_F32 + + Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32() + { + } // ~Inst_VOP2__V_MUL_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f. + void + Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_I32_I24 class methods --- + + Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_i32_i24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_I32_I24 + + Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24() + { + } // ~Inst_VOP2__V_MUL_I32_I24 + + // --- description from .arch file --- + // D.i = S0.i[23:0] * S1.i[23:0]. + void + Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) + * sext<24>(bits(src1[lane], 23, 0)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_HI_I32_I24 class methods --- + + Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_hi_i32_i24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_HI_I32_I24 + + Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24() + { + } // ~Inst_VOP2__V_MUL_HI_I32_I24 + + // --- description from .arch file --- + // D.i = (S0.i[23:0] * S1.i[23:0])>>32. + void + Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 tmp_src0 + = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); + VecElemI64 tmp_src1 + = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); + + vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_U32_U24 class methods --- + + Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_u32_u24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_U32_U24 + + Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24() + { + } // ~Inst_VOP2__V_MUL_U32_U24 + + // --- description from .arch file --- + // D.u = S0.u[23:0] * S1.u[23:0]. + void + Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + auto opImpl = [](VecOperandU32& src0, VecOperandU32& src1, + VecOperandU32& vdst, Wavefront* wf) { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(src0[lane], 23, 0) * + bits(src1[lane], 23, 0); + } + } + }; + + vop2Helper(gpuDynInst, opImpl); + } // execute + // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods --- + + Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_hi_u32_u24") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_HI_U32_U24 + + Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24() + { + } // ~Inst_VOP2__V_MUL_HI_U32_U24 + + // --- description from .arch file --- + // D.i = (S0.u[23:0] * S1.u[23:0])>>32. + void + Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); + VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); + vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_F32 class methods --- + + Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MIN_F32 + + Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32() + { + } // ~Inst_VOP2__V_MIN_F32 + + // --- description from .arch file --- + // D.f = (S0.f < S1.f ? S0.f : S1.f). + void + Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmin(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_F32 class methods --- + + Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP2__V_MAX_F32 + + Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32() + { + } // ~Inst_VOP2__V_MAX_F32 + + // --- description from .arch file --- + // D.f = (S0.f >= S1.f ? S0.f : S1.f). + void + Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmax(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_I32 class methods --- + + Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_i32") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_I32 + + Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32() + { + } // ~Inst_VOP2__V_MIN_I32 + + // --- description from .arch file --- + // D.i = min(S0.i, S1.i). + void + Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_I32 class methods --- + + Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_i32") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_I32 + + Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32() + { + } // ~Inst_VOP2__V_MAX_I32 + + // --- description from .arch file --- + // D.i = max(S0.i, S1.i). + void + Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_U32 class methods --- + + Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_U32 + + Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32() + { + } // ~Inst_VOP2__V_MIN_U32 + + // --- description from .arch file --- + // D.u = min(S0.u, S1.u). + void + Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_U32 class methods --- + + Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_U32 + + Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32() + { + } // ~Inst_VOP2__V_MAX_U32 + + // --- description from .arch file --- + // D.u = max(S0.u, S1.u). + void + Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHRREV_B32 class methods --- + + Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshrrev_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHRREV_B32 + + Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32() + { + } // ~Inst_VOP2__V_LSHRREV_B32 + + // --- description from .arch file --- + // D.u = S1.u >> S0.u[4:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ASHRREV_I32 class methods --- + + Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_ashrrev_i32") + { + setFlag(ALU); + } // Inst_VOP2__V_ASHRREV_I32 + + Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32() + { + } // ~Inst_VOP2__V_ASHRREV_I32 + + // --- description from .arch file --- + // D.i = signext(S1.i) >> S0.i[4:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHLREV_B32 class methods --- + + Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshlrev_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHLREV_B32 + + Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32() + { + } // ~Inst_VOP2__V_LSHLREV_B32 + + // --- description from .arch file --- + // D.u = S1.u << S0.u[4:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and vdst during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register " + "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: " + "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: " + "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0); + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_AND_B32 class methods --- + + Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_and_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_AND_B32 + + Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32() + { + } // ~Inst_VOP2__V_AND_B32 + + // --- description from .arch file --- + // D.u = S0.u & S1.u. + // Input and output modifiers not supported. + void + Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isDPPInst()) { + VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_dpp[lane] & src1[lane]; + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] & src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_OR_B32 class methods --- + + Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_or_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_OR_B32 + + Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32() + { + } // ~Inst_VOP2__V_OR_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u. + // Input and output modifiers not supported. + void + Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], " + "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] | src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] | src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_XOR_B32 class methods --- + + Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_xor_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_XOR_B32 + + Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32() + { + } // ~Inst_VOP2__V_XOR_B32 + + // --- description from .arch file --- + // D.u = S0.u ^ S1.u. + // Input and output modifiers not supported. + void + Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] ^ src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAC_F32 class methods --- + + Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mac_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(MAC); + } // Inst_VOP2__V_MAC_F32 + + Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32() + { + } // ~Inst_VOP2__V_MAC_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + D.f. + // SQ translates to V_MAD_F32. + void + Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + VecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + vdst.read(); + + if (isDPPInst()) { + VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0); + src0_dpp.read(); + + DPRINTF(VEGA, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], " + "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, " + "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, " + "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0, + extData.iFmt_VOP_DPP.DPP_CTRL, + extData.iFmt_VOP_DPP.SRC0_ABS, + extData.iFmt_VOP_DPP.SRC0_NEG, + extData.iFmt_VOP_DPP.SRC1_ABS, + extData.iFmt_VOP_DPP.SRC1_NEG, + extData.iFmt_VOP_DPP.BC, + extData.iFmt_VOP_DPP.BANK_MASK, + extData.iFmt_VOP_DPP.ROW_MASK); + + processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0_dpp[lane], src1[lane], + vdst[lane]); + } + } + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MADMK_F32 class methods --- + + Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madmk_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP2__V_MADMK_F32 + + Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32() + { + } // ~Inst_VOP2__V_MADMK_F32 + + // --- description from .arch file --- + // D.f = S0.f * K + S1.f; K is a 32-bit inline constant. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // --- modifiers. + // SQ translates to V_MAD_F32. + void + Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + VecElemF32 k = extData.imm_f32; + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], k, src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MADAK_F32 class methods --- + + Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madak_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP2__V_MADAK_F32 + + Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32() + { + } // ~Inst_VOP2__V_MADAK_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + K; K is a 32-bit inline constant. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // --- modifiers. + // SQ translates to V_MAD_F32. + void + Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + VecElemF32 k = extData.imm_f32; + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], k); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ADD_CO_U32 class methods --- + + Inst_VOP2__V_ADD_CO_U32::Inst_VOP2__V_ADD_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP2__V_ADD_CO_U32 + + Inst_VOP2__V_ADD_CO_U32::~Inst_VOP2__V_ADD_CO_U32() + { + } // ~Inst_VOP2__V_ADD_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED + // --- overflow or carry-out for V_ADDC_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP2__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_ADD_CO_U32 SRC SDWA. SRC0: register " + "v[%d], DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] + src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane] + + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + vcc.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0); + } + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_CO_U32 class methods --- + + Inst_VOP2__V_SUB_CO_U32::Inst_VOP2__V_SUB_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP2__V_SUB_CO_U32 + + Inst_VOP2__V_SUB_CO_U32::~Inst_VOP2__V_SUB_CO_U32() + { + } // ~Inst_VOP2__V_SUB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP2__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_CO_U32 class methods --- + + Inst_VOP2__V_SUBREV_CO_U32::Inst_VOP2__V_SUBREV_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP2__V_SUBREV_CO_U32 + + Inst_VOP2__V_SUBREV_CO_U32::~Inst_VOP2__V_SUBREV_CO_U32() + { + } // ~Inst_VOP2__V_SUBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP2__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_ADDC_CO_U32 class methods --- + + Inst_VOP2__V_ADDC_CO_U32::Inst_VOP2__V_ADDC_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_addc_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP2__V_ADDC_CO_U32 + + Inst_VOP2__V_ADDC_CO_U32::~Inst_VOP2__V_ADDC_CO_U32() + { + } // ~Inst_VOP2__V_ADDC_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + VCC[threadId]; + // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) + // is an UNSIGNED overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. + void + Inst_VOP2__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane] + + bits(vcc.rawData(), lane); + vcc.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane] + + (VecElemU64)bits(vcc.rawData(), lane, lane)) + >= 0x100000000 ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_SUBB_CO_U32 class methods --- + + Inst_VOP2__V_SUBB_CO_U32::Inst_VOP2__V_SUBB_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subb_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP2__V_SUBB_CO_U32 + + Inst_VOP2__V_SUBB_CO_U32::~Inst_VOP2__V_SUBB_CO_U32() + { + } // ~Inst_VOP2__V_SUBB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // --- overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // --- source comes from the SGPR-pair at S2.u. + void + Inst_VOP2__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = src0[lane] - src1[lane] - bits(vcc.rawData(), lane); + vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) + > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_SUBBREV_CO_U32 class methods --- + + Inst_VOP2__V_SUBBREV_CO_U32::Inst_VOP2__V_SUBBREV_CO_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subbrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP2__V_SUBBREV_CO_U32 + + Inst_VOP2__V_SUBBREV_CO_U32::~Inst_VOP2__V_SUBBREV_CO_U32() + { + } // ~Inst_VOP2__V_SUBBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. + // SQ translates this to V_SUBREV_U32 with reversed operands. + void + Inst_VOP2__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = src1[lane] - src0[lane] - bits(vcc.rawData(), lane); + vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane)) + > src1[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP2__V_ADD_F16 class methods --- + + Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_ADD_F16 + + Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16() + { + } // ~Inst_VOP2__V_ADD_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_SUB_F16 class methods --- + + Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_SUB_F16 + + Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16() + { + } // ~Inst_VOP2__V_SUB_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 - S1.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_SUBREV_F16 class methods --- + + Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_SUBREV_F16 + + Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16() + { + } // ~Inst_VOP2__V_SUBREV_F16 + + // --- description from .arch file --- + // D.f16 = S1.f16 - S0.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MUL_F16 class methods --- + + Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_MUL_F16 + + Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16() + { + } // ~Inst_VOP2__V_MUL_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MAC_F16 class methods --- + + Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mac_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(MAC); + } // Inst_VOP2__V_MAC_F16 + + Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16() + { + } // ~Inst_VOP2__V_MAC_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + D.f16. + // Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MADMK_F16 class methods --- + + Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madmk_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(MAD); + } // Inst_VOP2__V_MADMK_F16 + + Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16() + { + } // ~Inst_VOP2__V_MADMK_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored + // in the following literal DWORD. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // modifiers. Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MADAK_F16 class methods --- + + Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_madak_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(MAD); + } // Inst_VOP2__V_MADAK_F16 + + Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16() + { + } // ~Inst_VOP2__V_MADAK_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored + // in the following literal DWORD. + // This opcode cannot use the VOP3 encoding and cannot use input/output + // modifiers. Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_ADD_U16 class methods --- + + Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_ADD_U16 + + Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16() + { + } // ~Inst_VOP2__V_ADD_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 + S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_U16 class methods --- + + Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_SUB_U16 + + Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16() + { + } // ~Inst_VOP2__V_SUB_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 - S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_U16 class methods --- + + Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_SUBREV_U16 + + Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16() + { + } // ~Inst_VOP2__V_SUBREV_U16 + + // --- description from .arch file --- + // D.u16 = S1.u16 - S0.u16. + // Supports saturation (unsigned 16-bit integer domain). + // SQ translates this to V_SUB_U16 with reversed operands. + void + Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MUL_LO_U16 class methods --- + + Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_mul_lo_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_MUL_LO_U16 + + Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16() + { + } // ~Inst_VOP2__V_MUL_LO_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 * S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHLREV_B16 class methods --- + + Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshlrev_b16") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHLREV_B16 + + Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16() + { + } // ~Inst_VOP2__V_LSHLREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LSHRREV_B16 class methods --- + + Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_lshrrev_b16") + { + setFlag(ALU); + } // Inst_VOP2__V_LSHRREV_B16 + + Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16() + { + } // ~Inst_VOP2__V_LSHRREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_ASHRREV_I16 class methods --- + + Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_ashrrev_i16") + { + setFlag(ALU); + } // Inst_VOP2__V_ASHRREV_I16 + + Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16() + { + } // ~Inst_VOP2__V_ASHRREV_I16 + + // --- description from .arch file --- + // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_F16 class methods --- + + Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_MAX_F16 + + Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16() + { + } // ~Inst_VOP2__V_MAX_F16 + + // --- description from .arch file --- + // D.f16 = max(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MIN_F16 class methods --- + + Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_MIN_F16 + + Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16() + { + } // ~Inst_VOP2__V_MIN_F16 + + // --- description from .arch file --- + // D.f16 = min(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_MAX_U16 class methods --- + + Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_U16 + + Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16() + { + } // ~Inst_VOP2__V_MAX_U16 + + // --- description from .arch file --- + // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MAX_I16 class methods --- + + Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_max_i16") + { + setFlag(ALU); + } // Inst_VOP2__V_MAX_I16 + + Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16() + { + } // ~Inst_VOP2__V_MAX_I16 + + // --- description from .arch file --- + // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_U16 class methods --- + + Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_u16") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_U16 + + Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16() + { + } // ~Inst_VOP2__V_MIN_U16 + + // --- description from .arch file --- + // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_MIN_I16 class methods --- + + Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_min_i16") + { + setFlag(ALU); + } // Inst_VOP2__V_MIN_I16 + + Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16() + { + } // ~Inst_VOP2__V_MIN_I16 + + // --- description from .arch file --- + // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_LDEXP_F16 class methods --- + + Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_ldexp_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP2__V_LDEXP_F16 + + Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16() + { + } // ~Inst_VOP2__V_LDEXP_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * (2 ** S1.i16). + void + Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP2__V_ADD_U32 class methods --- + + Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_add_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_ADD_U32 + + Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32() + { + } // ~Inst_VOP2__V_ADD_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + void + Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + VecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + if (isSDWAInst()) { + VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0); + // use copies of original src0, src1, and dest during selecting + VecOperandU32 origSrc0_sdwa(gpuDynInst, + extData.iFmt_VOP_SDWA.SRC0); + VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1); + VecOperandU32 origVdst(gpuDynInst, instData.VDST); + + src0_sdwa.read(); + origSrc0_sdwa.read(); + origSrc1.read(); + + DPRINTF(VEGA, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], " + "DST_SEL: %d, DST_U: %d, CLMP: %d, SRC0_SEL: %d, " + "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, " + "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n", + extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL, + extData.iFmt_VOP_SDWA.DST_U, + extData.iFmt_VOP_SDWA.CLMP, + extData.iFmt_VOP_SDWA.SRC0_SEL, + extData.iFmt_VOP_SDWA.SRC0_SEXT, + extData.iFmt_VOP_SDWA.SRC0_NEG, + extData.iFmt_VOP_SDWA.SRC0_ABS, + extData.iFmt_VOP_SDWA.SRC1_SEL, + extData.iFmt_VOP_SDWA.SRC1_SEXT, + extData.iFmt_VOP_SDWA.SRC1_NEG, + extData.iFmt_VOP_SDWA.SRC1_ABS); + + processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa, + src1, origSrc1); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0_sdwa[lane] + src1[lane]; + origVdst[lane] = vdst[lane]; // keep copy consistent + } + } + + processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst); + } else { + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUB_U32 class methods --- + + Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_sub_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_SUB_U32 + + Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32() + { + } // ~Inst_VOP2__V_SUB_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + void + Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_SUBREV_U32 class methods --- + + Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_subrev_u32") + { + setFlag(ALU); + } // Inst_VOP2__V_SUBREV_U32 + + Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32() + { + } // ~Inst_VOP2__V_SUBREV_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + void + Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_FMAC_F32 class methods --- + + Inst_VOP2__V_FMAC_F32::Inst_VOP2__V_FMAC_F32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_fmac_f32") + { + setFlag(ALU); + } // Inst_VOP2__V_FMAC_F32 + + Inst_VOP2__V_FMAC_F32::~Inst_VOP2__V_FMAC_F32() + { + } // ~Inst_VOP2__V_FMAC_F32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + void + Inst_VOP2__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + vdst.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP2__V_XNOR_B32 class methods --- + + Inst_VOP2__V_XNOR_B32::Inst_VOP2__V_XNOR_B32(InFmt_VOP2 *iFmt) + : Inst_VOP2(iFmt, "v_xnor_b32") + { + setFlag(ALU); + } // Inst_VOP2__V_XNOR_B32 + + Inst_VOP2__V_XNOR_B32::~Inst_VOP2__V_XNOR_B32() + { + } // ~Inst_VOP2__V_XNOR_B32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + void + Inst_VOP2__V_XNOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + vdst.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ~(src0[lane] ^ src1[lane]); + } + } + + vdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3.cc b/src/arch/amdgpu/vega/insts/vop3.cc new file mode 100644 index 0000000000..b9fee17353 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3.cc @@ -0,0 +1,9109 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/common/dtype/mxfp_types.hh" +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP3__V_CNDMASK_B32 class methods --- + + Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cndmask_b32", false) + { + setFlag(ALU); + setFlag(ReadsVCC); + } // Inst_VOP3__V_CNDMASK_B32 + + Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32() + { + } // ~Inst_VOP3__V_CNDMASK_B32 + + // --- description from .arch file --- + // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC + // as a scalar GPR in S2. + void + Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(vcc.rawData(), lane) + ? src1[lane] : src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_F32 class methods --- + + Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_ADD_F32 + + Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32() + { + } // ~Inst_VOP3__V_ADD_F32 + + // --- description from .arch file --- + // D.f = S0.f + S1.f. + void + Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUB_F32 class methods --- + + Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SUB_F32 + + Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32() + { + } // ~Inst_VOP3__V_SUB_F32 + + // --- description from .arch file --- + // D.f = S0.f - S1.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_F32 class methods --- + + Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SUBREV_F32 + + Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32() + { + } // ~Inst_VOP3__V_SUBREV_F32 + + // --- description from .arch file --- + // D.f = S1.f - S0.f. + // SQ translates to V_ADD_F32. + void + Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_LEGACY_F32 class methods --- + + Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MUL_LEGACY_F32 + + Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32() + { + } // ~Inst_VOP3__V_MUL_LEGACY_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f (DX9 rules, 0.0*x = 0.0). + void + Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_F32 class methods --- + + Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MUL_F32 + + Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32() + { + } // ~Inst_VOP3__V_MUL_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f. + void + Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_I32_I24 class methods --- + + Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_i32_i24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_I32_I24 + + Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24() + { + } // ~Inst_VOP3__V_MUL_I32_I24 + + // --- description from .arch file --- + // D.i = S0.i[23:0] * S1.i[23:0]. + void + Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) + * sext<24>(bits(src1[lane], 23, 0)); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_I32_I24 class methods --- + + Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_i32_i24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_I32_I24 + + Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24() + { + } // ~Inst_VOP3__V_MUL_HI_I32_I24 + + // --- description from .arch file --- + // D.i = (S0.i[23:0] * S1.i[23:0])>>32. + void + Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 tmp_src0 + = (VecElemI64)sext<24>(bits(src0[lane], 23, 0)); + VecElemI64 tmp_src1 + = (VecElemI64)sext<24>(bits(src1[lane], 23, 0)); + + vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_U32_U24 class methods --- + + Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_u32_u24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_U32_U24 + + Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24() + { + } // ~Inst_VOP3__V_MUL_U32_U24 + + // --- description from .arch file --- + // D.u = S0.u[23:0] * S1.u[23:0]. + void + Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_U32_U24 class methods --- + + Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_u32_u24", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_U32_U24 + + Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24() + { + } // ~Inst_VOP3__V_MUL_HI_U32_U24 + + // --- description from .arch file --- + // D.i = (S0.u[23:0] * S1.u[23:0])>>32. + void + Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0); + VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0); + vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_F32 class methods --- + + Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MIN_F32 + + Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32() + { + } // ~Inst_VOP3__V_MIN_F32 + + // --- description from .arch file --- + // D.f = (S0.f < S1.f ? S0.f : S1.f). + void + Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmin(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_F32 class methods --- + + Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MAX_F32 + + Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32() + { + } // ~Inst_VOP3__V_MAX_F32 + + // --- description from .arch file --- + // D.f = (S0.f >= S1.f ? S0.f : S1.f). + void + Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmax(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_I32 class methods --- + + Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_I32 + + Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32() + { + } // ~Inst_VOP3__V_MIN_I32 + + // --- description from .arch file --- + // D.i = min(S0.i, S1.i). + void + Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_I32 class methods --- + + Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_I32 + + Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32() + { + } // ~Inst_VOP3__V_MAX_I32 + + // --- description from .arch file --- + // D.i = max(S0.i, S1.i). + void + Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_U32 class methods --- + + Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_U32 + + Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32() + { + } // ~Inst_VOP3__V_MIN_U32 + + // --- description from .arch file --- + // D.u = min(S0.u, S1.u). + void + Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_U32 class methods --- + + Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_U32 + + Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32() + { + } // ~Inst_VOP3__V_MAX_U32 + + // --- description from .arch file --- + // D.u = max(S0.u, S1.u). + void + Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHRREV_B32 class methods --- + + Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshrrev_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHRREV_B32 + + Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32() + { + } // ~Inst_VOP3__V_LSHRREV_B32 + + // --- description from .arch file --- + // D.u = S1.u >> S0.u[4:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ASHRREV_I32 class methods --- + + Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ashrrev_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ASHRREV_I32 + + Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32() + { + } // ~Inst_VOP3__V_ASHRREV_I32 + + // --- description from .arch file --- + // D.i = signext(S1.i) >> S0.i[4:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B32 class methods --- + + Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshlrev_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHLREV_B32 + + Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32() + { + } // ~Inst_VOP3__V_LSHLREV_B32 + + // --- description from .arch file --- + // D.u = S1.u << S0.u[4:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_AND_B32 class methods --- + + Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_and_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_AND_B32 + + Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32() + { + } // ~Inst_VOP3__V_AND_B32 + + // --- description from .arch file --- + // D.u = S0.u & S1.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] & src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_OR_B32 class methods --- + + Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_or_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_OR_B32 + + Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32() + { + } // ~Inst_VOP3__V_OR_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] | src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_OR3_B32 class methods --- + + Inst_VOP3__V_OR3_B32::Inst_VOP3__V_OR3_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_or3_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_OR3_B32 + + Inst_VOP3__V_OR3_B32::~Inst_VOP3__V_OR3_B32() + { + } // ~Inst_VOP3__V_OR3_B32 + + // --- description from .arch file --- + // D.u = S0.u | S1.u | S2.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_OR3_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] | src1[lane] | src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_XOR_B32 class methods --- + + Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_xor_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_XOR_B32 + + Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32() + { + } // ~Inst_VOP3__V_XOR_B32 + + // --- description from .arch file --- + // D.u = S0.u ^ S1.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] ^ src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAC_F32 class methods --- + + Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mac_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(MAC); + } // Inst_VOP3__V_MAC_F32 + + Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32() + { + } // ~Inst_VOP3__V_MAC_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + D.f. + // SQ translates to V_MAD_F32. + void + Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vdst.read(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_CO_U32 class methods --- + + Inst_VOP3__V_ADD_CO_U32::Inst_VOP3__V_ADD_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_add_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP3__V_ADD_CO_U32 + + Inst_VOP3__V_ADD_CO_U32::~Inst_VOP3__V_ADD_CO_U32() + { + } // ~Inst_VOP3__V_ADD_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u; + // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED + // --- overflow or carry-out for V_ADDC_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP3__V_ADD_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + vcc.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP3__V_SUB_CO_U32 class methods --- + + Inst_VOP3__V_SUB_CO_U32::Inst_VOP3__V_SUB_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_sub_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP3__V_SUB_CO_U32 + + Inst_VOP3__V_SUB_CO_U32::~Inst_VOP3__V_SUB_CO_U32() + { + } // ~Inst_VOP3__V_SUB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u; + // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + void + Inst_VOP3__V_SUB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_CO_U32 class methods --- + + Inst_VOP3__V_SUBREV_CO_U32::Inst_VOP3__V_SUBREV_CO_U32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + } // Inst_VOP3__V_SUBREV_CO_U32 + + Inst_VOP3__V_SUBREV_CO_U32::~Inst_VOP3__V_SUBREV_CO_U32() + { + } // ~Inst_VOP3__V_SUBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u; + // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or + // carry-out for V_SUBB_U32. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair. + // SQ translates this to V_SUB_U32 with reversed operands. + void + Inst_VOP3__V_SUBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vdst.write(); + vcc.write(); + } // execute + // --- Inst_VOP3__V_ADDC_CO_U32 class methods --- + + Inst_VOP3__V_ADDC_CO_U32::Inst_VOP3__V_ADDC_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_addc_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP3__V_ADDC_CO_U32 + + Inst_VOP3__V_ADDC_CO_U32::~Inst_VOP3__V_ADDC_CO_U32() + { + } // ~Inst_VOP3__V_ADDC_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + VCC[threadId]; + // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x800000000ULL ? 1 : 0) + // is an UNSIGNED overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. + void + Inst_VOP3__V_ADDC_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane] + + bits(vcc.rawData(), lane); + sdst.setBit(lane, ((VecElemU64)src0[lane] + + (VecElemU64)src1[lane] + + (VecElemU64)bits(vcc.rawData(), lane)) + >= 0x100000000 ? 1 : 0); + } + } + + vdst.write(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_SUBB_CO_U32 class methods --- + + Inst_VOP3__V_SUBB_CO_U32::Inst_VOP3__V_SUBB_CO_U32(InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subb_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP3__V_SUBB_CO_U32 + + Inst_VOP3__V_SUBB_CO_U32::~Inst_VOP3__V_SUBB_CO_U32() + { + } // ~Inst_VOP3__V_SUBB_CO_U32 + + // --- description from .arch file --- + // D.u = S0.u - S1.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // --- overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // --- source comes from the SGPR-pair at S2.u. + void + Inst_VOP3__V_SUBB_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + ScalarOperandU64 sdst(gpuDynInst, instData.SDST); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane] + - bits(vcc.rawData(), lane); + sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) + > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_SUBBREV_CO_U32 class methods --- + + Inst_VOP3__V_SUBBREV_CO_U32::Inst_VOP3__V_SUBBREV_CO_U32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_subbrev_co_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(ReadsVCC); + } // Inst_VOP3__V_SUBBREV_CO_U32 + + Inst_VOP3__V_SUBBREV_CO_U32::~Inst_VOP3__V_SUBBREV_CO_U32() + { + } // ~Inst_VOP3__V_SUBBREV_CO_U32 + + // --- description from .arch file --- + // D.u = S1.u - S0.u - VCC[threadId]; + // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED + // overflow. + // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC + // source comes from the SGPR-pair at S2.u. SQ translates to V_SUBB_U32. + void + Inst_VOP3__V_SUBBREV_CO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST); + ScalarOperandU64 vcc(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vcc.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane] + - bits(vcc.rawData(), lane); + sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane)) + > src0[lane] ? 1 : 0); + } + } + + vdst.write(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_F16 class methods --- + + Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_ADD_F16 + + Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16() + { + } // ~Inst_VOP3__V_ADD_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SUB_F16 class methods --- + + Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SUB_F16 + + Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16() + { + } // ~Inst_VOP3__V_SUB_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 - S1.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SUBREV_F16 class methods --- + + Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SUBREV_F16 + + Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16() + { + } // ~Inst_VOP3__V_SUBREV_F16 + + // --- description from .arch file --- + // D.f16 = S1.f16 - S0.f16. + // Supports denormals, round mode, exception flags, saturation. + // SQ translates to V_ADD_F16. + void + Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MUL_F16 class methods --- + + Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_MUL_F16 + + Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16() + { + } // ~Inst_VOP3__V_MUL_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16. + // Supports denormals, round mode, exception flags, saturation. + void + Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAC_F16 class methods --- + + Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mac_f16", false) + { + setFlag(ALU); + setFlag(F16); + setFlag(MAC); + } // Inst_VOP3__V_MAC_F16 + + Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16() + { + } // ~Inst_VOP3__V_MAC_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + D.f16. + // Supports round mode, exception flags, saturation. + // SQ translates this to V_MAD_F16. + void + Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_ADD_U16 class methods --- + + Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD_U16 + + Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16() + { + } // ~Inst_VOP3__V_ADD_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 + S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUB_U16 class methods --- + + Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUB_U16 + + Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16() + { + } // ~Inst_VOP3__V_SUB_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 - S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_U16 class methods --- + + Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUBREV_U16 + + Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16() + { + } // ~Inst_VOP3__V_SUBREV_U16 + + // --- description from .arch file --- + // D.u16 = S1.u16 - S0.u16. + // Supports saturation (unsigned 16-bit integer domain). + // SQ translates this to V_SUB_U16 with reversed operands. + void + Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_LO_U16 class methods --- + + Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_lo_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_LO_U16 + + Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16() + { + } // ~Inst_VOP3__V_MUL_LO_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 * S1.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B16 class methods --- + + Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshlrev_b16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHLREV_B16 + + Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16() + { + } // ~Inst_VOP3__V_LSHLREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] << S0.u[3:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHRREV_B16 class methods --- + + Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshrrev_b16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHRREV_B16 + + Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16() + { + } // ~Inst_VOP3__V_LSHRREV_B16 + + // --- description from .arch file --- + // D.u[15:0] = S1.u[15:0] >> S0.u[3:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ASHRREV_I16 class methods --- + + Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ashrrev_i16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ASHRREV_I16 + + Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16() + { + } // ~Inst_VOP3__V_ASHRREV_I16 + + // --- description from .arch file --- + // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_F16 class methods --- + + Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_MAX_F16 + + Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16() + { + } // ~Inst_VOP3__V_MAX_F16 + + // --- description from .arch file --- + // D.f16 = max(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MIN_F16 class methods --- + + Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_MIN_F16 + + Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16() + { + } // ~Inst_VOP3__V_MIN_F16 + + // --- description from .arch file --- + // D.f16 = min(S0.f16, S1.f16). + // IEEE compliant. Supports denormals, round mode, exception flags, + // saturation. + void + Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAX_U16 class methods --- + + Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_U16 + + Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16() + { + } // ~Inst_VOP3__V_MAX_U16 + + // --- description from .arch file --- + // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_I16 class methods --- + + Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_i16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX_I16 + + Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16() + { + } // ~Inst_VOP3__V_MAX_I16 + + // --- description from .arch file --- + // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::max(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_U16 class methods --- + + Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_U16 + + Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16() + { + } // ~Inst_VOP3__V_MIN_U16 + + // --- description from .arch file --- + // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]). + void + Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_I16 class methods --- + + Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_i16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN_I16 + + Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16() + { + } // ~Inst_VOP3__V_MIN_I16 + + // --- description from .arch file --- + // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]). + void + Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::min(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LDEXP_F16 class methods --- + + Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ldexp_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_LDEXP_F16 + + Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16() + { + } // ~Inst_VOP3__V_LDEXP_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * (2 ** S1.i16). + void + Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_ADD_U32 class methods --- + + Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD_U32 + + Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32() + { + } // ~Inst_VOP3__V_ADD_U32 + + // --- description from .arch file --- + // D.u32 = S0.u32 + S1.u32. + void + Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUB_U32 class methods --- + + Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sub_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUB_U32 + + Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32() + { + } // ~Inst_VOP3__V_SUB_U32 + + // --- description from .arch file --- + // D.u32 = S0.u32 - S1.u32. + void + Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] - src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SUBREV_U32 class methods --- + + Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_subrev_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SUBREV_U32 + + Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32() + { + } // ~Inst_VOP3__V_SUBREV_U32 + + // --- description from .arch file --- + // D.u32 = S1.u32 - S0.u32. + void + Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] - src0[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMAC_F32 class methods --- + + Inst_VOP3__V_FMAC_F32::Inst_VOP3__V_FMAC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fmac_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(FMA); + } // Inst_VOP3__V_FMAC_F32 + + Inst_VOP3__V_FMAC_F32::~Inst_VOP3__V_FMAC_F32() + { + } // ~Inst_VOP3__V_FMAC_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + D.f. + void + Inst_VOP3__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vdst.read(); + + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + panic_if(instData.OPSEL, "OPSEL not implemented for %s", _opcode); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + vdst.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + vdst.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + float out = std::fma(src0[lane], src1[lane], vdst[lane]); + out = omodModifier(out, extData.OMOD); + out = std::clamp(vdst[lane], 0.0f, 1.0f); + vdst[lane] = out; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_NOP class methods --- + + Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_nop", false) + { + setFlag(Nop); + setFlag(ALU); + } // Inst_VOP3__V_NOP + + Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP() + { + } // ~Inst_VOP3__V_NOP + + // --- description from .arch file --- + // Do nothing. + void + Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst) + { + } // execute + // --- Inst_VOP3__V_MOV_B32 class methods --- + + Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mov_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MOV_B32 + + Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32() + { + } // ~Inst_VOP3__V_MOV_B32 + + // --- description from .arch file --- + // D.u = S0.u. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_I32_F64 class methods --- + + Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_i32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_I32_F64 + + Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64() + { + } // ~Inst_VOP3__V_CVT_I32_F64 + + // --- description from .arch file --- + // D.i = (int)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F64_I32 class methods --- + + Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f64_i32", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F64_I32 + + Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32() + { + } // ~Inst_VOP3__V_CVT_F64_I32 + + // --- description from .arch file --- + // D.d = (double)S0.i. + void + Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_I32 class methods --- + + Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_i32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_I32 + + Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32() + { + } // ~Inst_VOP3__V_CVT_F32_I32 + + // --- description from .arch file --- + // D.f = (float)S0.i. + void + Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + VecOperandI32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_U32 class methods --- + + Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_u32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_U32 + + Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32() + { + } // ~Inst_VOP3__V_CVT_F32_U32 + + // --- description from .arch file --- + // D.f = (float)S0.u. + void + Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_U32_F32 class methods --- + + Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_u32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_U32_F32 + + Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32() + { + } // ~Inst_VOP3__V_CVT_U32_F32 + + // --- description from .arch file --- + // D.u = (unsigned)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_I32_F32 class methods --- + + Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_I32_F32 + + Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32() + { + } // ~Inst_VOP3__V_CVT_I32_F32 + + // --- description from .arch file --- + // D.i = (int)S0.f. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane]) || exp > 30) { + if (std::signbit(src[lane])) { + vdst[lane] = INT_MIN; + } else { + vdst[lane] = INT_MAX; + } + } else { + vdst[lane] = (VecElemI32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MOV_FED_B32 class methods --- + + Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mov_fed_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MOV_FED_B32 + + Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32() + { + } // ~Inst_VOP3__V_MOV_FED_B32 + + // --- description from .arch file --- + // D.u = S0.u; + // Introduce EDC double error upon write to dest vgpr without causing an + // --- exception. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F16_F32 class methods --- + + Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F16_F32 + + Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32() + { + } // ~Inst_VOP3__V_CVT_F16_F32 + + // --- description from .arch file --- + // D.f16 = flt32_to_flt16(S0.f). + // Supports input modifiers and creates FP16 denormals when appropriate. + void + Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + vdst.read(); + + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + + unsigned abs = instData.ABS; + unsigned neg = extData.NEG; + int opsel = instData.OPSEL; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + float tmp = src0[lane]; + + if ((abs & 1) && (tmp < 0)) tmp = -tmp; + if (neg & 1) tmp = -tmp; + + tmp = omodModifier(tmp, extData.OMOD); + tmp = std::clamp(tmp, 0.0f, 1.0f); + + AMDGPU::mxfloat16 out(tmp); + + // If opsel[3] use upper 16-bits of dest, otherwise lower. + if (opsel & 8) { + replaceBits(vdst[lane], 31, 16, (out.data >> 16)); + } else { + replaceBits(vdst[lane], 15, 0, (out.data >> 16)); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_F16 class methods --- + + Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_f16", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_F16 + + Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16() + { + } // ~Inst_VOP3__V_CVT_F32_F16 + + // --- description from .arch file --- + // D.f = flt16_to_flt32(S0.f16). + // FP16 denormal inputs are always accepted. + void + Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + + panic_if(isSDWAInst(), "SDWA not implemented for %s", _opcode); + panic_if(isDPPInst(), "DPP not implemented for %s", _opcode); + panic_if(instData.OPSEL, "OPSEL not implemented for %s", _opcode); + + unsigned abs = instData.ABS; + unsigned neg = extData.NEG; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + AMDGPU::mxfloat16 tmp(src0[lane]); + + if ((abs & 1) && (tmp < 0)) tmp = -tmp; + if (neg & 1) tmp = -tmp; + + float out = omodModifier(float(tmp), extData.OMOD); + out = std::clamp(out, 0.0f, 1.0f); + + vdst[lane] = out; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_RPI_I32_F32 class methods --- + + Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_rpi_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_RPI_I32_F32 + + Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32() + { + } // ~Inst_VOP3__V_CVT_RPI_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f + 0.5). + void + Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_FLR_I32_F32 class methods --- + + Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_flr_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_FLR_I32_F32 + + Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32() + { + } // ~Inst_VOP3__V_CVT_FLR_I32_F32 + + // --- description from .arch file --- + // D.i = (int)floor(S0.f). + void + Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemI32)std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_OFF_F32_I4 class methods --- + + Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_off_f32_i4", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_OFF_F32_I4 + + Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4() + { + } // ~Inst_VOP3__V_CVT_OFF_F32_I4 + + // --- description from .arch file --- + // 4-bit signed int to 32-bit float. Used for interpolation in shader. + void + Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst) + { + // Could not parse sq_uc.arch desc field + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F32_F64 class methods --- + + Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F32_F64 + + Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64() + { + } // ~Inst_VOP3__V_CVT_F32_F64 + + // --- description from .arch file --- + // D.f = (float)S0.d. + void + Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F64_F32 class methods --- + + Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f64_f32", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F64_F32 + + Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32() + { + } // ~Inst_VOP3__V_CVT_F64_F32 + + // --- description from .arch file --- + // D.d = (double)S0.f. + void + Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE0 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte0", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE0 + + Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE0 + + // --- description from .arch file --- + // D.f = (float)(S0.u[7:0]). + void + Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 7, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE1 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte1", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE1 + + Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE1 + + // --- description from .arch file --- + // D.f = (float)(S0.u[15:8]). + void + Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 15, 8); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE2 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte2", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE2 + + Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE2 + + // --- description from .arch file --- + // D.f = (float)(S0.u[23:16]). + void + Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 23, 16); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F32_UBYTE3 class methods --- + + Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f32_ubyte3", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_F32_UBYTE3 + + Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3() + { + } // ~Inst_VOP3__V_CVT_F32_UBYTE3 + + // --- description from .arch file --- + // D.f = (float)(S0.u[31:24]). + void + Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF32)bits(src[lane], 31, 24); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_U32_F64 class methods --- + + Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_u32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_U32_F64 + + Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64() + { + } // ~Inst_VOP3__V_CVT_U32_F64 + + // --- description from .arch file --- + // D.u = (unsigned)S0.d. + // Out-of-range floating point values (including infinity) saturate. NaN is + // --- converted to 0. + void + Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp; + std::frexp(src[lane],&exp); + if (std::isnan(src[lane])) { + vdst[lane] = 0; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = 0; + } else { + vdst[lane] = UINT_MAX; + } + } else if (exp > 31) { + vdst[lane] = UINT_MAX; + } else { + vdst[lane] = (VecElemU32)src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_F64_U32 class methods --- + + Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f64_u32", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CVT_F64_U32 + + Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32() + { + } // ~Inst_VOP3__V_CVT_F64_U32 + + // --- description from .arch file --- + // D.d = (double)S0.u. + void + Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (VecElemF64)src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_TRUNC_F64 class methods --- + + Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trunc_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_TRUNC_F64 + + Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64() + { + } // ~Inst_VOP3__V_TRUNC_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d), return integer part of S0.d. + void + Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CEIL_F64 class methods --- + + Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ceil_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CEIL_F64 + + Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64() + { + } // ~Inst_VOP3__V_CEIL_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d > 0.0 && S0.d != D.d) then D.d += 1.0. + void + Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RNDNE_F64 class methods --- + + Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rndne_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_RNDNE_F64 + + Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64() + { + } // ~Inst_VOP3__V_RNDNE_F64 + + // --- description from .arch file --- + // D.d = round_nearest_even(S0.d). + void + Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FLOOR_F64 class methods --- + + Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_floor_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FLOOR_F64 + + Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64() + { + } // ~Inst_VOP3__V_FLOOR_F64 + + // --- description from .arch file --- + // D.d = trunc(S0.d); + // if (S0.d < 0.0 && S0.d != D.d) then D.d += -1.0. + void + Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FRACT_F32 class methods --- + + Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fract_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FRACT_F32 + + Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32() + { + } // ~Inst_VOP3__V_FRACT_F32 + + // --- description from .arch file --- + // D.f = S0.f - floor(S0.f). + void + Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_TRUNC_F32 class methods --- + + Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trunc_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_TRUNC_F32 + + Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32() + { + } // ~Inst_VOP3__V_TRUNC_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f), return integer part of S0.f. + void + Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::trunc(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CEIL_F32 class methods --- + + Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ceil_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CEIL_F32 + + Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32() + { + } // ~Inst_VOP3__V_CEIL_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f > 0.0 && S0.f != D.f) then D.f += 1.0. + void + Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ceil(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RNDNE_F32 class methods --- + + Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rndne_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RNDNE_F32 + + Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32() + { + } // ~Inst_VOP3__V_RNDNE_F32 + + // --- description from .arch file --- + // D.f = round_nearest_even(S0.f). + void + Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = roundNearestEven(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FLOOR_F32 class methods --- + + Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_floor_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FLOOR_F32 + + Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32() + { + } // ~Inst_VOP3__V_FLOOR_F32 + + // --- description from .arch file --- + // D.f = trunc(S0.f); + // if (S0.f < 0.0 && S0.f != D.f) then D.f += -1.0. + void + Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::floor(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_EXP_F32 class methods --- + + Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_exp_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_EXP_F32 + + Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32() + { + } // ~Inst_VOP3__V_EXP_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f). + void + Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LOG_F32 class methods --- + + Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_log_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_LOG_F32 + + Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32() + { + } // ~Inst_VOP3__V_LOG_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm. + void + Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RCP_F32 class methods --- + + Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RCP_F32 + + Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32() + { + } // ~Inst_VOP3__V_RCP_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal with IEEE rules and < 1ulp error. + void + Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RCP_IFLAG_F32 class methods --- + + Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_iflag_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RCP_IFLAG_F32 + + Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32() + { + } // ~Inst_VOP3__V_RCP_IFLAG_F32 + + // --- description from .arch file --- + // D.f = 1.0 / S0.f. Reciprocal intended for integer division, can raise + // --- integer DIV_BY_ZERO exception but cannot raise floating-point + // --- exceptions. + void + Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RSQ_F32 class methods --- + + Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rsq_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_RSQ_F32 + + Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32() + { + } // ~Inst_VOP3__V_RSQ_F32 + + // --- description from .arch file --- + // D.f = 1.0 / sqrt(S0.f). Reciprocal square root with IEEE rules. + void + Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RCP_F64 class methods --- + + Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_RCP_F64 + + Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64() + { + } // ~Inst_VOP3__V_RCP_F64 + + // --- description from .arch file --- + // D.d = 1.0 / S0.d. + void + Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane])) { + if (std::signbit(src[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = 1.0 / src[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_RSQ_F64 class methods --- + + Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rsq_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_RSQ_F64 + + Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64() + { + } // ~Inst_VOP3__V_RSQ_F64 + + // --- description from .arch file --- + // D.d = 1.0 / sqrt(S0.d). See V_RSQ_F32. + void + Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src[lane]) == FP_ZERO) { + vdst[lane] = +INFINITY; + } else if (std::isnan(src[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) { + vdst[lane] = 0.0; + } else if (std::signbit(src[lane])) { + vdst[lane] = NAN; + } else { + vdst[lane] = 1.0 / std::sqrt(src[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SQRT_F32 class methods --- + + Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sqrt_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SQRT_F32 + + Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32() + { + } // ~Inst_VOP3__V_SQRT_F32 + + // --- description from .arch file --- + // D.f = sqrt(S0.f). + void + Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SQRT_F64 class methods --- + + Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sqrt_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_SQRT_F64 + + Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64() + { + } // ~Inst_VOP3__V_SQRT_F64 + + // --- description from .arch file --- + // D.d = sqrt(S0.d). + void + Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sqrt(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SIN_F32 class methods --- + + Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sin_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_SIN_F32 + + Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32() + { + } // ~Inst_VOP3__V_SIN_F32 + + // --- description from .arch file --- + // D.f = sin(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 0.0. + void + Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::sin(src[lane] * 2 * pi.rawData()); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_COS_F32 class methods --- + + Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cos_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_COS_F32 + + Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32() + { + } // ~Inst_VOP3__V_COS_F32 + + // --- description from .arch file --- + // D.f = cos(S0.f * 2 * PI). + // Valid range of S0.f is [-256.0, +256.0]. Out of range input results in + // float 1.0. + void + Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + ConstScalarOperandF32 pi(gpuDynInst, REG_PI); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + pi.read(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::cos(src[lane] * 2 * pi.rawData()); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_NOT_B32 class methods --- + + Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_not_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_NOT_B32 + + Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32() + { + } // ~Inst_VOP3__V_NOT_B32 + + // --- description from .arch file --- + // D.u = ~S0.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ~src[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BFREV_B32 class methods --- + + Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfrev_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFREV_B32 + + Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32() + { + } // ~Inst_VOP3__V_BFREV_B32 + + // --- description from .arch file --- + // D.u[31:0] = S0.u[0:31], bitfield reverse. + // Input and output modifiers not supported. + void + Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = reverseBits(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FFBH_U32 class methods --- + + Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ffbh_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_FFBH_U32 + + Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32() + { + } // ~Inst_VOP3__V_FFBH_U32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from MSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOneMsb(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FFBL_B32 class methods --- + + Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ffbl_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_FFBL_B32 + + Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32() + { + } // ~Inst_VOP3__V_FFBL_B32 + + // --- description from .arch file --- + // D.u = position of first 1 in S0.u from LSB; + // D.u = 0xffffffff if S0.u == 0. + void + Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = findFirstOne(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FFBH_I32 class methods --- + + Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ffbh_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_FFBH_I32 + + Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32() + { + } // ~Inst_VOP3__V_FFBH_I32 + + // --- description from .arch file --- + // D.u = position of first bit different from sign bit in S0.i from MSB; + // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff. + void + Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = firstOppositeSignBit(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_EXP_I32_F64 class methods --- + + Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FREXP_EXP_I32_F64 + + Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64() + { + } // ~Inst_VOP3__V_FREXP_EXP_I32_F64 + + // --- description from .arch file --- + // See V_FREXP_EXP_I32_F32. + void + Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp(0); + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_MANT_F64 class methods --- + + Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_mant_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FREXP_MANT_F64 + + Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64() + { + } // ~Inst_VOP3__V_FREXP_MANT_F64 + + // --- description from .arch file --- + // See V_FREXP_MANT_F32. + void + Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FRACT_F64 class methods --- + + Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fract_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_FRACT_F64 + + Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64() + { + } // ~Inst_VOP3__V_FRACT_F64 + + // --- description from .arch file --- + // See V_FRACT_F32. + void + Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src(gpuDynInst, extData.SRC0); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 int_part(0.0); + vdst[lane] = std::modf(src[lane], &int_part); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_EXP_I32_F32 class methods --- + + Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_exp_i32_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FREXP_EXP_I32_F32 + + Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32() + { + } // ~Inst_VOP3__V_FREXP_EXP_I32_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.i = 0; + // else D.i = TwosComplement(Exponent(S0.f) - 127 + 1). + // Returns exponent of single precision float input, such that S0.f = + // significand * (2 ** exponent). See also FREXP_MANT_F32, which returns + // the significand. + void + Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane])|| std::isnan(src[lane])) { + vdst[lane] = 0; + } else { + VecElemI32 exp(0); + std::frexp(src[lane], &exp); + vdst[lane] = exp; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FREXP_MANT_F32 class methods --- + + Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_mant_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_FREXP_MANT_F32 + + Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32() + { + } // ~Inst_VOP3__V_FREXP_MANT_F32 + + // --- description from .arch file --- + // if (S0.f == INF || S0.f == NAN) then D.f = S0.f; + // else D.f = Mantissa(S0.f). + // Result range is in (-1.0,-0.5][0.5,1.0) in normal cases. Returns binary + // --- significand of single precision float input, such that S0.f = + // --- significand * (2 ** exponent). See also FREXP_EXP_I32_F32, which + // --- returns integer exponent. + void + Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isinf(src[lane]) || std::isnan(src[lane])) { + vdst[lane] = src[lane]; + } else { + VecElemI32 exp(0); + vdst[lane] = std::frexp(src[lane], &exp); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CLREXCP class methods --- + + Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_clrexcp", false) + { + } // Inst_VOP3__V_CLREXCP + + Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP() + { + } // ~Inst_VOP3__V_CLREXCP + + // --- description from .arch file --- + // Clear wave's exception state in SIMD (SP). + void + Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F16_U16 class methods --- + + Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f16_u16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_F16_U16 + + Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16() + { + } // ~Inst_VOP3__V_CVT_F16_U16 + + // --- description from .arch file --- + // D.f16 = uint16_to_flt16(S.u16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_F16_I16 class methods --- + + Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_f16_i16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_F16_I16 + + Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16() + { + } // ~Inst_VOP3__V_CVT_F16_I16 + + // --- description from .arch file --- + // D.f16 = int16_to_flt16(S.i16). + // Supports denormals, rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_U16_F16 class methods --- + + Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_u16_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_U16_F16 + + Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16() + { + } // ~Inst_VOP3__V_CVT_U16_F16 + + // --- description from .arch file --- + // D.u16 = flt16_to_uint16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_I16_F16 class methods --- + + Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_i16_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CVT_I16_F16 + + Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16() + { + } // ~Inst_VOP3__V_CVT_I16_F16 + + // --- description from .arch file --- + // D.i16 = flt16_to_int16(S.f16). + // Supports rounding, exception flags and saturation. + void + Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_RCP_F16 class methods --- + + Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rcp_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_RCP_F16 + + Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16() + { + } // ~Inst_VOP3__V_RCP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecip(S0.f16). + void + Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SQRT_F16 class methods --- + + Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sqrt_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SQRT_F16 + + Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16() + { + } // ~Inst_VOP3__V_SQRT_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateSqrt(S0.f16). + void + Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_RSQ_F16 class methods --- + + Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rsq_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_RSQ_F16 + + Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16() + { + } // ~Inst_VOP3__V_RSQ_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 1.0f; + // else + // D.f16 = ApproximateRecipSqrt(S0.f16). + void + Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_LOG_F16 class methods --- + + Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_log_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_LOG_F16 + + Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16() + { + } // ~Inst_VOP3__V_LOG_F16 + + // --- description from .arch file --- + // if (S0.f16 == 1.0f) + // D.f16 = 0.0f; + // else + // D.f16 = ApproximateLog2(S0.f16). + void + Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_EXP_F16 class methods --- + + Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_exp_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_EXP_F16 + + Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16() + { + } // ~Inst_VOP3__V_EXP_F16 + + // --- description from .arch file --- + // if (S0.f16 == 0.0f) + // D.f16 = 1.0f; + // else + // D.f16 = Approximate2ToX(S0.f16). + void + Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FREXP_MANT_F16 class methods --- + + Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_mant_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FREXP_MANT_F16 + + Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16() + { + } // ~Inst_VOP3__V_FREXP_MANT_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.f16 = S0.f16; + // else + // D.f16 = mantissa(S0.f16). + // Result range is (-1.0,-0.5][0.5,1.0). + // C math library frexp function. + // Returns binary significand of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FREXP_EXP_I16_F16 class methods --- + + Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_frexp_exp_i16_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FREXP_EXP_I16_F16 + + Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16() + { + } // ~Inst_VOP3__V_FREXP_EXP_I16_F16 + + // --- description from .arch file --- + // if (S0.f16 == +-INF || S0.f16 == NAN) + // D.i16 = 0; + // else + // D.i16 = 2s_complement(exponent(S0.f16) - 15 + 1). + // C math library frexp function. + // Returns exponent of half precision float input, such that the + // original single float = significand * (2 ** exponent). + void + Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FLOOR_F16 class methods --- + + Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_floor_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FLOOR_F16 + + Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16() + { + } // ~Inst_VOP3__V_FLOOR_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 < 0.0f && S0.f16 != D.f16) then D.f16 -= 1.0f. + void + Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CEIL_F16 class methods --- + + Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ceil_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CEIL_F16 + + Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16() + { + } // ~Inst_VOP3__V_CEIL_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16); + // if (S0.f16 > 0.0f && S0.f16 != D.f16) then D.f16 += 1.0f. + void + Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_TRUNC_F16 class methods --- + + Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trunc_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_TRUNC_F16 + + Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16() + { + } // ~Inst_VOP3__V_TRUNC_F16 + + // --- description from .arch file --- + // D.f16 = trunc(S0.f16). + // Round-to-zero semantics. + void + Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_RNDNE_F16 class methods --- + + Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_rndne_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_RNDNE_F16 + + Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16() + { + } // ~Inst_VOP3__V_RNDNE_F16 + + // --- description from .arch file --- + // D.f16 = FLOOR(S0.f16 + 0.5f); + // if (floor(S0.f16) is even && fract(S0.f16) == 0.5f) then D.f16 -= 1.0f. + // Round-to-nearest-even semantics. + void + Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_FRACT_F16 class methods --- + + Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fract_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_FRACT_F16 + + Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16() + { + } // ~Inst_VOP3__V_FRACT_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 + -floor(S0.f16). + void + Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_SIN_F16 class methods --- + + Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sin_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_SIN_F16 + + Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16() + { + } // ~Inst_VOP3__V_SIN_F16 + + // --- description from .arch file --- + // D.f16 = sin(S0.f16 * 2 * PI). + void + Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_COS_F16 class methods --- + + Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cos_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_COS_F16 + + Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16() + { + } // ~Inst_VOP3__V_COS_F16 + + // --- description from .arch file --- + // D.f16 = cos(S0.f16 * 2 * PI). + void + Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_EXP_LEGACY_F32 class methods --- + + Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_exp_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_EXP_LEGACY_F32 + + Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32() + { + } // ~Inst_VOP3__V_EXP_LEGACY_F32 + + // --- description from .arch file --- + // D.f = pow(2.0, S0.f) with legacy semantics. + void + Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + if (instData.ABS & 0x1) { + src.absModifier(); + } + + if (extData.NEG & 0x1) { + src.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::pow(2.0, src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LOG_LEGACY_F32 class methods --- + + Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_log_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_LOG_LEGACY_F32 + + Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32() + { + } // ~Inst_VOP3__V_LOG_LEGACY_F32 + + // --- description from .arch file --- + // D.f = log2(S0.f). Base 2 logarithm with legacy semantics. + void + Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src(gpuDynInst, extData.SRC0); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::log2(src[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_LEGACY_F32 class methods --- + + Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_legacy_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP3__V_MAD_LEGACY_F32 + + Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32() + { + } // ~Inst_VOP3__V_MAD_LEGACY_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + S2.f (DX9 rules, 0.0 * x = 0.0). + void + Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_F32 class methods --- + + Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(MAD); + } // Inst_VOP3__V_MAD_F32 + + Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32() + { + } // ~Inst_VOP3__V_MAD_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + S2.f. + void + Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_I32_I24 class methods --- + + Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_i32_i24", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_I32_I24 + + Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24() + { + } // ~Inst_VOP3__V_MAD_I32_I24 + + // --- description from .arch file --- + // D.i = S0.i[23:0] * S1.i[23:0] + S2.i. + void + Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = sext<24>(bits(src0[lane], 23, 0)) + * sext<24>(bits(src1[lane], 23, 0)) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_U32_U24 class methods --- + + Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_u32_u24", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_U32_U24 + + Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24() + { + } // ~Inst_VOP3__V_MAD_U32_U24 + + // --- description from .arch file --- + // D.u = S0.u[23:0] * S1.u[23:0] + S2.u. + void + Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0) + + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CUBEID_F32 class methods --- + + Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubeid_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBEID_F32 + + Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32() + { + } // ~Inst_VOP3__V_CUBEID_F32 + + // --- description from .arch file --- + // D.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). XYZ coordinate is given in + // --- (S0.f, S1.f, S2.f). + void + Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CUBESC_F32 class methods --- + + Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubesc_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBESC_F32 + + Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32() + { + } // ~Inst_VOP3__V_CUBESC_F32 + + // --- description from .arch file --- + // D.f = cubemap S coordinate. XYZ coordinate is given in (S0.f, S1.f, + // S2.f). + void + Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CUBETC_F32 class methods --- + + Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubetc_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBETC_F32 + + Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32() + { + } // ~Inst_VOP3__V_CUBETC_F32 + + // --- description from .arch file --- + // D.f = cubemap T coordinate. XYZ coordinate is given in (S0.f, S1.f, + // S2.f). + void + Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CUBEMA_F32 class methods --- + + Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cubema_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CUBEMA_F32 + + Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32() + { + } // ~Inst_VOP3__V_CUBEMA_F32 + + // --- description from .arch file --- + // D.f = 2.0 * cubemap major axis. XYZ coordinate is given in (S0.f, S1.f, + // --- S2.f). + void + Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_BFE_U32 class methods --- + + Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfe_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFE_U32 + + Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32() + { + } // ~Inst_VOP3__V_BFE_U32 + + // --- description from .arch file --- + // D.u = (S0.u>>S1.u[4:0]) & ((1<wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) + & ((1 << bits(src2[lane], 4, 0)) - 1); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BFE_I32 class methods --- + + Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfe_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFE_I32 + + Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32() + { + } // ~Inst_VOP3__V_BFE_I32 + + // --- description from .arch file --- + // D.i = (S0.i>>S1.u[4:0]) & ((1<wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0)) + & ((1 << bits(src2[lane], 4, 0)) - 1); + + // Above extracted a signed int of size src2 bits which needs + // to be signed-extended. Check if the MSB of our src2-bit + // integer is 1, and sign extend it is. + if (vdst[lane] >> (bits(src2[lane], 4, 0) - 1)) { + vdst[lane] |= 0xffffffff << bits(src2[lane], 4, 0); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BFI_B32 class methods --- + + Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfi_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFI_B32 + + Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32() + { + } // ~Inst_VOP3__V_BFI_B32 + + // --- description from .arch file --- + // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert. + void + Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane] + & src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMA_F32 class methods --- + + Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fma_f32", false) + { + setFlag(ALU); + setFlag(F32); + setFlag(FMA); + } // Inst_VOP3__V_FMA_F32 + + Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32() + { + } // ~Inst_VOP3__V_FMA_F32 + + // --- description from .arch file --- + // D.f = S0.f * S1.f + S2.f. + void + Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMA_F64 class methods --- + + Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fma_f64", false) + { + setFlag(ALU); + setFlag(F64); + setFlag(FMA); + } // Inst_VOP3__V_FMA_F64 + + Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64() + { + } // ~Inst_VOP3__V_FMA_F64 + + // --- description from .arch file --- + // D.d = S0.d * S1.d + S2.d. + void + Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LERP_U8 class methods --- + + Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lerp_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LERP_U8 + + Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8() + { + } // ~Inst_VOP3__V_LERP_U8 + + // --- description from .arch file --- + // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24 + // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16; + // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8; + // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1). + // Unsigned 8-bit pixel average on packed unsigned bytes (linear + // --- interpolation). S2 acts as a round mode; if set, 0.5 rounds up, + // --- otherwise 0.5 truncates. + void + Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ((bits(src0[lane], 31, 24) + + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1) + << 24; + vdst[lane] += ((bits(src0[lane], 23, 16) + + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1) + << 16; + vdst[lane] += ((bits(src0[lane], 15, 8) + + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1) + << 8; + vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0) + + bits(src2[lane], 0)) >> 1); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ALIGNBIT_B32 class methods --- + + Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_alignbit_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ALIGNBIT_B32 + + Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32() + { + } // ~Inst_VOP3__V_ALIGNBIT_B32 + + // --- description from .arch file --- + // D.u = ({S0,S1} >> S2.u[4:0]) & 0xffffffff. + void + Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) + | (VecElemU64)src1[lane]); + vdst[lane] = (VecElemU32)((src_0_1 + >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ALIGNBYTE_B32 class methods --- + + Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_alignbyte_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ALIGNBYTE_B32 + + Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32() + { + } // ~Inst_VOP3__V_ALIGNBYTE_B32 + + // --- description from .arch file --- + // D.u = ({S0,S1} >> (8*S2.u[4:0])) & 0xffffffff. + void + Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32) + | (VecElemU64)src1[lane]); + vdst[lane] = (VecElemU32)((src_0_1 + >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0))) + & 0xffffffff); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN3_F32 class methods --- + + Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min3_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MIN3_F32 + + Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32() + { + } // ~Inst_VOP3__V_MIN3_F32 + + // --- description from .arch file --- + // D.f = min(S0.f, S1.f, S2.f). + void + Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]); + vdst[lane] = std::fmin(min_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN3_I32 class methods --- + + Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min3_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN3_I32 + + Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32() + { + } // ~Inst_VOP3__V_MIN3_I32 + + // --- description from .arch file --- + // D.i = min(S0.i, S1.i, S2.i). + void + Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]); + vdst[lane] = std::min(min_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN3_U32 class methods --- + + Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MIN3_U32 + + Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32() + { + } // ~Inst_VOP3__V_MIN3_U32 + + // --- description from .arch file --- + // D.u = min(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]); + vdst[lane] = std::min(min_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX3_F32 class methods --- + + Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max3_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MAX3_F32 + + Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32() + { + } // ~Inst_VOP3__V_MAX3_F32 + + // --- description from .arch file --- + // D.f = max(S0.f, S1.f, S2.f). + void + Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]); + vdst[lane] = std::fmax(max_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX3_I32 class methods --- + + Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max3_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX3_I32 + + Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32() + { + } // ~Inst_VOP3__V_MAX3_I32 + + // --- description from .arch file --- + // D.i = max(S0.i, S1.i, S2.i). + void + Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]); + vdst[lane] = std::max(max_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX3_U32 class methods --- + + Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MAX3_U32 + + Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32() + { + } // ~Inst_VOP3__V_MAX3_U32 + + // --- description from .arch file --- + // D.u = max(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]); + vdst[lane] = std::max(max_0_1, src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MED3_F32 class methods --- + + Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_med3_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_MED3_F32 + + Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32() + { + } // ~Inst_VOP3__V_MED3_F32 + + // --- description from .arch file --- + // D.f = median(S0.f, S1.f, S2.f). + void + Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = median(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MED3_I32 class methods --- + + Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_med3_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MED3_I32 + + Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32() + { + } // ~Inst_VOP3__V_MED3_I32 + + // --- description from .arch file --- + // D.i = median(S0.i, S1.i, S2.i). + void + Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI32 src2(gpuDynInst, extData.SRC2); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = median(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MED3_U32 class methods --- + + Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_med3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MED3_U32 + + Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32() + { + } // ~Inst_VOP3__V_MED3_U32 + + // --- description from .arch file --- + // D.u = median(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = median(src0[lane], src1[lane], src2[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_U8 class methods --- + + Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_U8 + + Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8() + { + } // ~Inst_VOP3__V_SAD_U8 + + // --- description from .arch file --- + // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) + + // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u. + // Sum of absolute differences with accumulation, overflow into upper bits + // is allowed. + void + Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::abs(bits(src0[lane], 31, 24) + - bits(src1[lane], 31, 24)) + + std::abs(bits(src0[lane], 23, 16) + - bits(src1[lane], 23, 16)) + + std::abs(bits(src0[lane], 15, 8) + - bits(src1[lane], 15, 8)) + + std::abs(bits(src0[lane], 7, 0) + - bits(src1[lane], 7, 0)) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_HI_U8 class methods --- + + Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_hi_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_HI_U8 + + Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8() + { + } // ~Inst_VOP3__V_SAD_HI_U8 + + // --- description from .arch file --- + // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u. + // Sum of absolute differences with accumulation, overflow is lost. + void + Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (((bits(src0[lane], 31, 24) + - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16) + - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8) + - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0) + - bits(src1[lane], 7, 0))) << 16) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_U16 class methods --- + + Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_u16", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_U16 + + Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16() + { + } // ~Inst_VOP3__V_SAD_U16 + + // --- description from .arch file --- + // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0]) + // + S2.u. + // Word SAD with accumulation. + void + Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::abs(bits(src0[lane], 31, 16) + - bits(src1[lane], 31, 16)) + + std::abs(bits(src0[lane], 15, 0) + - bits(src1[lane], 15, 0)) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_SAD_U32 class methods --- + + Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_sad_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_SAD_U32 + + Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32() + { + } // ~Inst_VOP3__V_SAD_U32 + + // --- description from .arch file --- + // D.u = abs(S0.i - S1.i) + S2.u. + // Dword SAD with accumulation. + void + Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane]; + } // if + } // for + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_PK_U8_F32 class methods --- + + Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_u8_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PK_U8_F32 + + Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32() + { + } // ~Inst_VOP3__V_CVT_PK_U8_F32 + + // --- description from .arch file --- + // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0])) + // | (S2.u & ~(0xff << (8 * S1.u[1:0]))). + // Convert floating point value S0 to 8-bit unsigned integer and pack the + // result into byte S1 of dword S2. + void + Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (((VecElemU8)src0[lane] & 0xff) + << (8 * bits(src1[lane], 1, 0))) + | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0)))); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FIXUP_F32 class methods --- + + Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fixup_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_DIV_FIXUP_F32 + + Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32() + { + } // ~Inst_VOP3__V_DIV_FIXUP_F32 + + // --- description from .arch file --- + // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator, + // s2.f = Numerator. This opcode generates exceptions resulting from the + // division operation. + void + Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::fpclassify(src1[lane]) == FP_ZERO) { + if (std::signbit(src1[lane])) { + vdst[lane] = -INFINITY; + } else { + vdst[lane] = +INFINITY; + } + } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if (std::isinf(src1[lane])) { + if (std::signbit(src1[lane])) { + vdst[lane] = -INFINITY; + } else { + vdst[lane] = +INFINITY; + } + } else { + vdst[lane] = src2[lane] / src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods --- + + Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fixup_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_DIV_FIXUP_F64 + + Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64() + { + } // ~Inst_VOP3__V_DIV_FIXUP_F64 + + // --- description from .arch file --- + // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator, + // s2.d = Numerator. This opcode generates exceptions resulting from the + // division operation. + void + Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int sign_out = std::signbit(src1[lane]) + ^ std::signbit(src2[lane]); + int exp1(0); + int exp2(0); + std::frexp(src1[lane], &exp1); + std::frexp(src2[lane], &exp2); + + if (std::isnan(src1[lane]) || std::isnan(src2[lane])) { + vdst[lane] = std::numeric_limits::quiet_NaN(); + } else if (std::fpclassify(src1[lane]) == FP_ZERO + && std::fpclassify(src2[lane]) == FP_ZERO) { + vdst[lane] + = std::numeric_limits::signaling_NaN(); + } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) { + vdst[lane] + = std::numeric_limits::signaling_NaN(); + } else if (std::fpclassify(src1[lane]) == FP_ZERO + || std::isinf(src2[lane])) { + vdst[lane] = sign_out ? -INFINITY : +INFINITY; + } else if (std::isinf(src1[lane]) + || std::fpclassify(src2[lane]) == FP_ZERO) { + vdst[lane] = sign_out ? -0.0 : +0.0; + } else if (exp2 - exp1 < -1075) { + vdst[lane] = src0[lane]; + } else if (exp1 == 2047) { + vdst[lane] = src0[lane]; + } else { + vdst[lane] = sign_out ? -std::fabs(src0[lane]) + : std::fabs(src0[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_SCALE_F32 class methods --- + + Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_div_scale_f32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(F32); + } // Inst_VOP3__V_DIV_SCALE_F32 + + Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32() + { + } // ~Inst_VOP3__V_DIV_SCALE_F32 + + // --- description from .arch file --- + // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f = + // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a + // numerator and denominator, this opcode will appropriately scale inputs + // for division to avoid subnormal terms during Newton-Raphson correction + // algorithm. This opcode producses a VCC flag for post-scale of quotient. + void + Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane]; + vcc.setBit(lane, 0); + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_SCALE_F64 class methods --- + + Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_div_scale_f64") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(F64); + } // Inst_VOP3__V_DIV_SCALE_F64 + + Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64() + { + } // ~Inst_VOP3__V_DIV_SCALE_F64 + + // --- description from .arch file --- + // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d = + // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a + // numerator and denominator, this opcode will appropriately scale inputs + // for division to avoid subnormal terms during Newton-Raphson correction + // algorithm. This opcode producses a VCC flag for post-scale of quotient. + void + Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int exp1(0); + int exp2(0); + std::frexp(src1[lane], &exp1); + std::frexp(src2[lane], &exp2); + vcc.setBit(lane, 0); + + if (std::fpclassify(src1[lane]) == FP_ZERO + || std::fpclassify(src2[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (exp2 - exp1 >= 768) { + vcc.setBit(lane, 1); + if (src0[lane] == src1[lane]) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) { + vdst[lane] = std::ldexp(src0[lane], 128); + } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL + && std::fpclassify(src2[lane] / src1[lane]) + == FP_SUBNORMAL) { + vcc.setBit(lane, 1); + if (src0[lane] == src1[lane]) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) { + vdst[lane] = std::ldexp(src0[lane], -128); + } else if (std::fpclassify(src2[lane] / src1[lane]) + == FP_SUBNORMAL) { + vcc.setBit(lane, 1); + if (src0[lane] == src2[lane]) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } else if (exp2 <= 53) { + vdst[lane] = std::ldexp(src0[lane], 128); + } + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FMAS_F32 class methods --- + + Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fmas_f32", false) + { + setFlag(ALU); + setFlag(ReadsVCC); + setFlag(F32); + setFlag(FMA); + } // Inst_VOP3__V_DIV_FMAS_F32 + + Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32() + { + } // ~Inst_VOP3__V_DIV_FMAS_F32 + + // --- description from .arch file --- + // D.f = Special case divide FMA with scale and flags(s0.f = Quotient, + // s1.f = Denominator, s2.f = Numerator) + void + Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF32 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + + //vdst.write(); + } // execute + // --- Inst_VOP3__V_DIV_FMAS_F64 class methods --- + + Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fmas_f64", false) + { + setFlag(ALU); + setFlag(ReadsVCC); + setFlag(F64); + setFlag(FMA); + } // Inst_VOP3__V_DIV_FMAS_F64 + + Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64() + { + } // ~Inst_VOP3__V_DIV_FMAS_F64 + + // --- description from .arch file --- + // D.d = Special case divide FMA with scale and flags(s0.d = Quotient, + // s1.d = Denominator, s2.d = Numerator) + void + Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandF64 src2(gpuDynInst, extData.SRC2); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + vcc.read(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (instData.ABS & 0x4) { + src2.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + if (extData.NEG & 0x4) { + src2.negModifier(); + } + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(vcc.rawData(), lane)) { + vdst[lane] = std::pow(2, 64) + * std::fma(src0[lane], src1[lane], src2[lane]); + } else { + vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MSAD_U8 class methods --- + + Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_msad_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MSAD_U8 + + Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8() + { + } // ~Inst_VOP3__V_MSAD_U8 + + // --- description from .arch file --- + // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u). + void + Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_QSAD_PK_U16_U8 class methods --- + + Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_qsad_pk_u16_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_QSAD_PK_U16_U8 + + Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8() + { + } // ~Inst_VOP3__V_QSAD_PK_U16_U8 + + // --- description from .arch file --- + // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], + // S1.u[31:0], S2.u[63:0]) + void + Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MQSAD_PK_U16_U8 class methods --- + + Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mqsad_pk_u16_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MQSAD_PK_U16_U8 + + Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8() + { + } // ~Inst_VOP3__V_MQSAD_PK_U16_U8 + + // --- description from .arch file --- + // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0], + // --- S1.u[31:0], S2.u[63:0]) + void + Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MQSAD_U32_U8 class methods --- + + Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mqsad_u32_u8", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MQSAD_U32_U8 + + Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8() + { + } // ~Inst_VOP3__V_MQSAD_U32_U8 + + // --- description from .arch file --- + // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0], + // --- S1.u[31:0], S2.u[127:0]) + void + Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAD_U64_U32 class methods --- + + Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_mad_u64_u32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(MAD); + } // Inst_VOP3__V_MAD_U64_U32 + + Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32() + { + } // ~Inst_VOP3__V_MAD_U64_U32 + + // --- description from .arch file --- + // {vcc_out,D.u64} = S0.u32 * S1.u32 + S2.u64. + void + Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + vdst.read(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], + src2[lane])); + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_I64_I32 class methods --- + + Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32( + InFmt_VOP3B *iFmt) + : Inst_VOP3B(iFmt, "v_mad_i64_i32") + { + setFlag(ALU); + setFlag(WritesVCC); + setFlag(MAD); + } // Inst_VOP3__V_MAD_I64_I32 + + Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32() + { + } // ~Inst_VOP3__V_MAD_I64_I32 + + // --- description from .arch file --- + // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64. + void + Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI64 src2(gpuDynInst, extData.SRC2); + ScalarOperandU64 vcc(gpuDynInst, instData.SDST); + VecOperandI64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane], + src2[lane])); + } + } + + vcc.write(); + vdst.write(); + } // execute + // --- Inst_VOP3__V_XAD_U32 class methods --- + + Inst_VOP3__V_XAD_U32::Inst_VOP3__V_XAD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_xad_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_XAD_U32 + + Inst_VOP3__V_XAD_U32::~Inst_VOP3__V_XAD_U32() + { + } // ~Inst_VOP3__V_XAD_U32 + + // --- description from .arch file --- + // D.u32 = (S0.u32 ^ S1.u32) + S2.u32. + void + Inst_VOP3__V_XAD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] ^ src1[lane]) + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHL_ADD_U32 class methods --- + + Inst_VOP3__V_LSHL_ADD_U32::Inst_VOP3__V_LSHL_ADD_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshl_add_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHL_ADD_U32 + + Inst_VOP3__V_LSHL_ADD_U32::~Inst_VOP3__V_LSHL_ADD_U32() + { + } // ~Inst_VOP3__V_LSHL_ADD_U32 + + // --- description from .arch file --- + // D.u = (S0.u << S1.u[4:0]) + S2.u. + void + Inst_VOP3__V_LSHL_ADD_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) + + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD_LSHL_U32 class methods --- + + Inst_VOP3__V_ADD_LSHL_U32::Inst_VOP3__V_ADD_LSHL_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_lshl_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD_LSHL_U32 + + Inst_VOP3__V_ADD_LSHL_U32::~Inst_VOP3__V_ADD_LSHL_U32() + { + } // ~Inst_VOP3__V_ADD_LSHL_U32 + + // --- description from .arch file --- + // D.u = (S0.u + S1.u) << S2.u[4:0]. + void + Inst_VOP3__V_ADD_LSHL_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = + (src0[lane] + src1[lane]) << bits(src2[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ADD3_U32 class methods --- + + Inst_VOP3__V_ADD3_U32::Inst_VOP3__V_ADD3_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add3_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ADD3_U32 + + Inst_VOP3__V_ADD3_U32::~Inst_VOP3__V_ADD3_U32() + { + } // ~Inst_VOP3__V_ADD3_U32 + + // --- description from .arch file --- + // D.u = S0.u + S1.u + S2.u. + void + Inst_VOP3__V_ADD3_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] + src1[lane] + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHL_OR_B32 class methods --- + + Inst_VOP3__V_LSHL_OR_B32::Inst_VOP3__V_LSHL_OR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshl_or_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHL_OR_B32 + + Inst_VOP3__V_LSHL_OR_B32::~Inst_VOP3__V_LSHL_OR_B32() + { + } // ~Inst_VOP3__V_LSHL_OR_B32 + + // --- description from .arch file --- + // D.u = (S0.u << S1.u[4:0]) | S2.u. + void + Inst_VOP3__V_LSHL_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] << bits(src1[lane], 4, 0)) + | src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_AND_OR_B32 class methods --- + + Inst_VOP3__V_AND_OR_B32::Inst_VOP3__V_AND_OR_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_and_or_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_AND_OR_B32 + + Inst_VOP3__V_AND_OR_B32::~Inst_VOP3__V_AND_OR_B32() + { + } // ~Inst_VOP3__V_AND_OR_B32 + + // --- description from .arch file --- + // D.u = (S0.u & S1.u) | S2.u. + // Input and output modifiers not supported. + void + Inst_VOP3__V_AND_OR_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = (src0[lane] & src1[lane]) | src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_F16 class methods --- + + Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_f16", false) + { + setFlag(ALU); + setFlag(F16); + setFlag(MAD); + } // Inst_VOP3__V_MAD_F16 + + Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16() + { + } // ~Inst_VOP3__V_MAD_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + S2.f16. + // Supports round mode, exception flags, saturation. + void + Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_MAD_U16 class methods --- + + Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_u16", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_U16 + + Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16() + { + } // ~Inst_VOP3__V_MAD_U16 + + // --- description from .arch file --- + // D.u16 = S0.u16 * S1.u16 + S2.u16. + // Supports saturation (unsigned 16-bit integer domain). + void + Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU16 src2(gpuDynInst, extData.SRC2); + VecOperandU16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane] + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAD_I16 class methods --- + + Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mad_i16", false) + { + setFlag(ALU); + setFlag(MAD); + } // Inst_VOP3__V_MAD_I16 + + Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16() + { + } // ~Inst_VOP3__V_MAD_I16 + + // --- description from .arch file --- + // D.i16 = S0.i16 * S1.i16 + S2.i16. + // Supports saturation (signed 16-bit integer domain). + void + Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ConstVecOperandI16 src2(gpuDynInst, extData.SRC2); + VecOperandI16 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src0[lane] * src1[lane] + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_PERM_B32 class methods --- + + Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_perm_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_PERM_B32 + + Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32() + { + } // ~Inst_VOP3__V_PERM_B32 + + // --- description from .arch file --- + // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]); + // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]); + // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]); + // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]); + // byte permute(byte in[8], byte sel) { + // if (sel>=13) then return 0xff; + // elsif(sel==12) then return 0x00; + // elsif(sel==11) then return in[7][7] * 0xff; + // elsif(sel==10) then return in[5][7] * 0xff; + // elsif(sel==9) then return in[3][7] * 0xff; + // elsif(sel==8) then return in[1][7] * 0xff; + // else return in[sel]; + // } + // Byte permute. + void + Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU32 src2(gpuDynInst, extData.SRC2); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemU64 selector = (VecElemU64)src0[lane]; + selector = (selector << 32) | (VecElemU64)src1[lane]; + vdst[lane] = 0; + + DPRINTF(VEGA, "Executing v_perm_b32 src_0 0x%08x, src_1 " + "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane], + src1[lane], src2[lane], vdst[lane]); + DPRINTF(VEGA, "Selector: 0x%08x \n", selector); + + for (int i = 0; i < 4 ; ++i) { + VecElemU32 permuted_val = permute(selector, 0xFF + & ((VecElemU32)src2[lane] >> (8 * i))); + vdst[lane] |= (permuted_val << (8 * i)); + } + + DPRINTF(VEGA, "v_perm result: 0x%08x\n", vdst[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_FMA_F16 class methods --- + + Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_fma_f16", false) + { + setFlag(ALU); + setFlag(F16); + setFlag(FMA); + } // Inst_VOP3__V_FMA_F16 + + Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16() + { + } // ~Inst_VOP3__V_FMA_F16 + + // --- description from .arch file --- + // D.f16 = S0.f16 * S1.f16 + S2.f16. + // Fused half precision multiply add. + void + Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_DIV_FIXUP_F16 class methods --- + + Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_div_fixup_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_DIV_FIXUP_F16 + + Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16() + { + } // ~Inst_VOP3__V_DIV_FIXUP_F16 + + // --- description from .arch file --- + // sign_out = sign(S1.f16)^sign(S2.f16); + // if (S2.f16 == NAN) + // D.f16 = Quiet(S2.f16); + // else if (S1.f16 == NAN) + // D.f16 = Quiet(S1.f16); + // else if (S1.f16 == S2.f16 == 0) + // # 0/0 + // D.f16 = pele_nan(0xfe00); + // else if (abs(S1.f16) == abs(S2.f16) == +-INF) + // # inf/inf + // D.f16 = pele_nan(0xfe00); + // else if (S1.f16 ==0 || abs(S2.f16) == +-INF) + // # x/0, or inf/y + // D.f16 = sign_out ? -INF : INF; + // else if (abs(S1.f16) == +-INF || S2.f16 == 0) + // # x/inf, 0/y + // D.f16 = sign_out ? -0 : 0; + // else if ((exp(S2.f16) - exp(S1.f16)) < -150) + // D.f16 = sign_out ? -underflow : underflow; + // else if (exp(S1.f16) == 255) + // D.f16 = sign_out ? -overflow : overflow; + // else + // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16). + // Half precision division fixup. + // S0 = Quotient, S1 = Denominator, S3 = Numerator. + // Given a numerator, denominator, and quotient from a divide, this opcode + // will detect and apply special case numerics, touching up the quotient if + // necessary. This opcode also generates invalid, denorm and divide by + // zero exceptions caused by the division. + void + Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_LSHL_ADD_U64 class methods --- + + Inst_VOP3__V_LSHL_ADD_U64::Inst_VOP3__V_LSHL_ADD_U64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshl_add_u64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHL_ADD_U64 + + Inst_VOP3__V_LSHL_ADD_U64::~Inst_VOP3__V_LSHL_ADD_U64() + { + } // ~Inst_VOP3__V_LSHL_ADD_U64 + + // --- description from .arch file --- + // D.u = (S0.u << S1.u[4:0]) + S2.u. + void + Inst_VOP3__V_LSHL_ADD_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + int shift_amount = bits(src1[lane], 2, 0); + shift_amount = shift_amount > 4 ? 0 : shift_amount; + vdst[lane] = (src0[lane] << shift_amount) + + src2[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_PKACCUM_U8_F32 class methods --- + + Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pkaccum_u8_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKACCUM_U8_F32 + + Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32() + { + } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32 + + // --- description from .arch file --- + // byte = S1.u[1:0]; bit = byte * 8; + // D.u[bit+7:bit] = flt32_to_uint8(S0.f); + // Pack converted value of S0.f into byte S1 of the destination. + // SQ translates to V_CVT_PK_U8_F32. + // Note: this opcode uses src_c to pass destination in as a source. + void + Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P1_F32 class methods --- + + Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p1_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_INTERP_P1_F32 + + Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32() + { + } // ~Inst_VOP3__V_INTERP_P1_F32 + + // --- description from .arch file --- + // D.f = P10 * S.f + P0; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // CAUTION: when in HALF_LDS mode, D must not be the same GPR as S; if + // D == S then data corruption will occur. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P2_F32 class methods --- + + Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p2_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_INTERP_P2_F32 + + Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32() + { + } // ~Inst_VOP3__V_INTERP_P2_F32 + + // --- description from .arch file --- + // D.f = P20 * S.f + D.f; parameter interpolation (SQ translates to + // V_MAD_F32 for SP). + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_MOV_F32 class methods --- + + Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_mov_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_INTERP_MOV_F32 + + Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32() + { + } // ~Inst_VOP3__V_INTERP_MOV_F32 + + // --- description from .arch file --- + // D.f = {P10,P20,P0}[S.u]; parameter load. + void + Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P1LL_F16 class methods --- + + Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p1ll_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_INTERP_P1LL_F16 + + Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16() + { + } // ~Inst_VOP3__V_INTERP_P1LL_F16 + + // --- description from .arch file --- + // D.f32 = P10.f16 * S0.f32 + P0.f16. + // 'LL' stands for 'two LDS arguments'. + // attr_word selects the high or low half 16 bits of each LDS dword + // accessed. + // This opcode is available for 32-bank LDS only. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P1LV_F16 class methods --- + + Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p1lv_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_INTERP_P1LV_F16 + + Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16() + { + } // ~Inst_VOP3__V_INTERP_P1LV_F16 + + // --- description from .arch file --- + // D.f32 = P10.f16 * S0.f32 + (S2.u32 >> (attr_word * 16)).f16. + // 'LV' stands for 'One LDS and one VGPR argument'. + // S2 holds two parameters, attr_word selects the high or low word of the + // VGPR for this calculation, as well as the high or low half of the LDS + // data. + // Meant for use with 16-bank LDS. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_INTERP_P2_F16 class methods --- + + Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_interp_p2_f16", false) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_INTERP_P2_F16 + + Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16() + { + } // ~Inst_VOP3__V_INTERP_P2_F16 + + // --- description from .arch file --- + // D.f16 = P20.f16 * S0.f32 + S2.f32. + // Final computation. attr_word selects LDS high or low 16bits. Used for + // both 16- and 32-bank LDS. + // Result is always written to the 16 LSBs of the destination VGPR. + // NOTE: In textual representations the I/J VGPR is the first source and + // the attribute is the second source; however in the VOP3 encoding the + // attribute is stored in the src0 field and the VGPR is stored in the + // src1 field. + void + Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_ADD_F64 class methods --- + + Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_add_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_ADD_F64 + + Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64() + { + } // ~Inst_VOP3__V_ADD_F64 + + // --- description from .arch file --- + // D.d = S0.d + S1.d. + void + Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane]) ) { + vdst[lane] = NAN; + } else if (std::isinf(src0[lane]) && + std::isinf(src1[lane])) { + if (std::signbit(src0[lane]) != + std::signbit(src1[lane])) { + vdst[lane] = NAN; + } else { + vdst[lane] = src0[lane]; + } + } else if (std::isinf(src0[lane])) { + vdst[lane] = src0[lane]; + } else if (std::isinf(src1[lane])) { + vdst[lane] = src1[lane]; + } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + if (std::signbit(src0[lane]) && + std::signbit(src1[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = src1[lane]; + } + } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) { + if (std::signbit(src0[lane]) && + std::signbit(src1[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = 0.0; + } + } else { + vdst[lane] = src0[lane]; + } + } else { + vdst[lane] = src0[lane] + src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_F64 class methods --- + + Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_MUL_F64 + + Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64() + { + } // ~Inst_VOP3__V_MUL_F64 + + // --- description from .arch file --- + // D.d = S0.d * S1.d. + void + Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || + std::isnan(src1[lane])) { + vdst[lane] = NAN; + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + !std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL || + std::fpclassify(src0[lane]) == FP_ZERO) && + std::signbit(src0[lane])) { + if (std::isinf(src1[lane])) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +0.0; + } else { + vdst[lane] = -0.0; + } + } else if (std::isinf(src0[lane]) && + !std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (!std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else if (std::isinf(src0[lane]) && + std::signbit(src0[lane])) { + if (std::fpclassify(src1[lane]) == FP_SUBNORMAL || + std::fpclassify(src1[lane]) == FP_ZERO) { + vdst[lane] = NAN; + } else if (std::signbit(src1[lane])) { + vdst[lane] = +INFINITY; + } else { + vdst[lane] = -INFINITY; + } + } else { + vdst[lane] = src0[lane] * src1[lane]; + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MIN_F64 class methods --- + + Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_min_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_MIN_F64 + + Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64() + { + } // ~Inst_VOP3__V_MIN_F64 + + // --- description from .arch file --- + // D.d = min(S0.d, S1.d). + void + Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmin(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MAX_F64 class methods --- + + Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_max_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_MAX_F64 + + Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64() + { + } // ~Inst_VOP3__V_MAX_F64 + + // --- description from .arch file --- + // D.d = max(S0.d, S1.d). + void + Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::fmax(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LDEXP_F64 class methods --- + + Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ldexp_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_LDEXP_F64 + + Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64() + { + } // ~Inst_VOP3__V_LDEXP_F64 + + // --- description from .arch file --- + // D.d = pow(S0.d, S1.i[31:0]). + void + Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandF64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (std::isnan(src0[lane]) || std::isinf(src0[lane])) { + vdst[lane] = src0[lane]; + } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + || std::fpclassify(src0[lane]) == FP_ZERO) { + if (std::signbit(src0[lane])) { + vdst[lane] = -0.0; + } else { + vdst[lane] = +0.0; + } + } else { + vdst[lane] = std::ldexp(src0[lane], src1[lane]); + } + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_LO_U32 class methods --- + + Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_lo_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_LO_U32 + + Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32() + { + } // ~Inst_VOP3__V_MUL_LO_U32 + + // --- description from .arch file --- + // D.u = S0.u * S1.u. + void + Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 s0 = (VecElemI64)src0[lane]; + VecElemI64 s1 = (VecElemI64)src1[lane]; + vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_U32 class methods --- + + Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_U32 + + Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32() + { + } // ~Inst_VOP3__V_MUL_HI_U32 + + // --- description from .arch file --- + // D.u = (S0.u * S1.u) >> 32. + void + Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 s0 = (VecElemI64)src0[lane]; + VecElemI64 s1 = (VecElemI64)src1[lane]; + vdst[lane] + = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MUL_HI_I32 class methods --- + + Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mul_hi_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MUL_HI_I32 + + Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32() + { + } // ~Inst_VOP3__V_MUL_HI_I32 + + // --- description from .arch file --- + // D.i = (S0.i * S1.i) >> 32. + void + Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandI32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + VecElemI64 s0 = (VecElemI64)src0[lane]; + VecElemI64 s1 = (VecElemI64)src1[lane]; + vdst[lane] + = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LDEXP_F32 class methods --- + + Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ldexp_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_LDEXP_F32 + + Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32() + { + } // ~Inst_VOP3__V_LDEXP_F32 + + // --- description from .arch file --- + // D.f = pow(S0.f, S1.i) + void + Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + VecOperandF32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = std::ldexp(src0[lane], src1[lane]); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_READLANE_B32 class methods --- + + Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_readlane_b32", true) + { + setFlag(ALU); + setFlag(IgnoreExec); + } // Inst_VOP3__V_READLANE_B32 + + Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32() + { + } // ~Inst_VOP3__V_READLANE_B32 + + // --- description from .arch file --- + // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR# + // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU32 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.read(); + + sdst = src0[src1.rawData() & 0x3f]; + + sdst.write(); + } // execute + // --- Inst_VOP3__V_WRITELANE_B32 class methods --- + + Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_writelane_b32", false) + { + setFlag(ALU); + setFlag(IgnoreExec); + } // Inst_VOP3__V_WRITELANE_B32 + + Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32() + { + } // ~Inst_VOP3__V_WRITELANE_B32 + + // --- description from .arch file --- + // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data + // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores + // exec mask. + // Input and output modifiers not supported; this is an untyped operation. + // SQ translates to V_MOV_B32. + void + Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst) + { + ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0); + ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.read(); + src1.read(); + vdst.read(); + + vdst[src1.rawData() & 0x3f] = src0.rawData(); + + vdst.write(); + } // execute + // --- Inst_VOP3__V_BCNT_U32_B32 class methods --- + + Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bcnt_u32_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BCNT_U32_B32 + + Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32() + { + } // ~Inst_VOP3__V_BCNT_U32_B32 + + // --- description from .arch file --- + // D.u = CountOneBits(S0.u) + S1.u. Bit count. + void + Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = popCount(src0[lane]) + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MBCNT_LO_U32_B32 class methods --- + + Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mbcnt_lo_u32_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MBCNT_LO_U32_B32 + + Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32() + { + } // ~Inst_VOP3__V_MBCNT_LO_U32_B32 + + // --- description from .arch file --- + // ThreadMask = (1 << ThreadPosition) - 1; + // D.u = CountOneBits(S0.u & ThreadMask[31:0]) + S1.u. + // Masked bit count, ThreadPosition is the position of this thread in the + // --- wavefront (in 0..63). + void + Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + uint64_t threadMask = 0; + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + threadMask = ((1ULL << lane) - 1ULL); + vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) + + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods --- + + Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_mbcnt_hi_u32_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_MBCNT_HI_U32_B32 + + Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32() + { + } // ~Inst_VOP3__V_MBCNT_HI_U32_B32 + + // --- description from .arch file --- + // ThreadMask = (1 << ThreadPosition) - 1; + // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u. + // Masked bit count, ThreadPosition is the position of this thread in the + // --- wavefront (in 0..63). + void + Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + uint64_t threadMask = 0; + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + threadMask = ((1ULL << lane) - 1ULL); + vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) + + src1[lane]; + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHLREV_B64 class methods --- + + Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshlrev_b64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHLREV_B64 + + Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64() + { + } // ~Inst_VOP3__V_LSHLREV_B64 + + // --- description from .arch file --- + // D.u64 = S1.u64 << S0.u[5:0]. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] << bits(src0[lane], 5, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_LSHRREV_B64 class methods --- + + Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_lshrrev_b64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_LSHRREV_B64 + + Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64() + { + } // ~Inst_VOP3__V_LSHRREV_B64 + + // --- description from .arch file --- + // D.u64 = S1.u64 >> S0.u[5:0]. + // The vacated bits are set to zero. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_ASHRREV_I64 class methods --- + + Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_ashrrev_i64", false) + { + setFlag(ALU); + } // Inst_VOP3__V_ASHRREV_I64 + + Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64() + { + } // ~Inst_VOP3__V_ASHRREV_I64 + + // --- description from .arch file --- + // D.u64 = signext(S1.u64) >> S0.u[5:0]. + // The vacated bits are set to the sign bit of the input value. + // SQ translates this to an internal SP opcode. + void + Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] + = src1[lane] >> bits(src0[lane], 5, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_TRIG_PREOP_F64 class methods --- + + Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_trig_preop_f64", false) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_TRIG_PREOP_F64 + + Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64() + { + } // ~Inst_VOP3__V_TRIG_PREOP_F64 + + // --- description from .arch file --- + // D.d = Look Up 2/PI (S0.d) with segment select S1.u[4:0]. This operation + // returns an aligned, double precision segment of 2/PI needed to do range + // reduction on S0.d (double-precision value). Multiple segments can be + // specified through S1.u[4:0]. Rounding is always round-to-zero. Large + // inputs (exp > 1968) are scaled to avoid loss of precision through + // denormalization. + void + Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_BFM_B32 class methods --- + + Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_bfm_b32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_BFM_B32 + + Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32() + { + } // ~Inst_VOP3__V_BFM_B32 + + // --- description from .arch file --- + // D.u = ((1<wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1) + << bits(src1[lane], 4, 0); + } + } + + vdst.write(); + } // execute + // --- Inst_VOP3__V_CVT_PKNORM_I16_F32 class methods --- + + Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pknorm_i16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKNORM_I16_F32 + + Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32() + { + } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32 + + // --- description from .arch file --- + // D = {(snorm)S1.f, (snorm)S0.f}. + void + Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PKNORM_U16_F32 class methods --- + + Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pknorm_u16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKNORM_U16_F32 + + Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32() + { + } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32 + + // --- description from .arch file --- + // D = {(unorm)S1.f, (unorm)S0.f}. + void + Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PKRTZ_F16_F32 class methods --- + + Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pkrtz_f16_f32", false) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CVT_PKRTZ_F16_F32 + + Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32() + { + } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32 + + // --- description from .arch file --- + // D = {flt32_to_flt16(S1.f),flt32_to_flt16(S0.f)}, with round-toward-zero + // --- regardless of current round mode setting in hardware. + // This opcode is intended for use with 16-bit compressed exports. + // See V_CVT_F16_F32 for a version that respects the current rounding mode. + void + Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PK_U16_U32 class methods --- + + Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_u16_u32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_CVT_PK_U16_U32 + + Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32() + { + } // ~Inst_VOP3__V_CVT_PK_U16_U32 + + // --- description from .arch file --- + // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}. + void + Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PK_I16_I32 class methods --- + + Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_i16_i32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_CVT_PK_I16_I32 + + Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32() + { + } // ~Inst_VOP3__V_CVT_PK_I16_I32 + + // --- description from .arch file --- + // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}. + void + Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CVT_PK_FP8_F32 class methods --- + + Inst_VOP3__V_CVT_PK_FP8_F32::Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_fp8_f32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_CVT_PK_FP8_F32 + + Inst_VOP3__V_CVT_PK_FP8_F32::~Inst_VOP3__V_CVT_PK_FP8_F32() + { + } // ~Inst_VOP3__V_CVT_PK_FP8_F32 + + void + Inst_VOP3__V_CVT_PK_FP8_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vdst.read(); // Preserve bits + + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + panic_if(instData.CLAMP, "CLAMP not supported for %s", _opcode); + panic_if(extData.OMOD, "OMOD not supported for %s", _opcode); + + unsigned opsel = instData.OPSEL; + unsigned abs = instData.ABS; + unsigned neg = extData.NEG; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + AMDGPU::mxfloat8 tmp0(src0[lane]), tmp1(src1[lane]); + + if ((abs & 1) && (tmp0 < 0)) tmp0 = -tmp0; + if ((abs & 2) && (tmp1 < 0)) tmp1 = -tmp1; + if (neg & 1) tmp0 = -tmp0; + if (neg & 2) tmp1 = -tmp1; + + uint16_t packed_data = (bits(tmp0.data, 31, 24) << 8) + | bits(tmp1.data, 31, 24); + + if (opsel & 8) { + replaceBits(vdst[lane], 31, 16, packed_data); + } else { + replaceBits(vdst[lane], 15, 0, packed_data); + } + } + } + + vdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3_cmp.cc b/src/arch/amdgpu/vega/insts/vop3_cmp.cc new file mode 100644 index 0000000000..4bbec930e6 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3_cmp.cc @@ -0,0 +1,8145 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/inst_util.hh" +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOP3__V_CMP_CLASS_F32 class methods --- + + Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_class_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_CLASS_F32 + + Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32() + { + } // ~Inst_VOP3__V_CMP_CLASS_F32 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_CLASS_F32 class methods --- + + Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_class_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_CLASS_F32 + + Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32() + { + } // ~Inst_VOP3__V_CMPX_CLASS_F32 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.f + // The function reports true if the floating point value is *any* of the + // numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_CLASS_F64 class methods --- + + Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_class_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_CLASS_F64 + + Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64() + { + } // ~Inst_VOP3__V_CMP_CLASS_F64 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.d + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_CLASS_F64 class methods --- + + Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_class_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_CLASS_F64 + + Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64() + { + } // ~Inst_VOP3__V_CMPX_CLASS_F64 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.d + // The function reports true if the floating point value is *any* of the + // numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + sdst.setBit(lane, 1); + continue; + } + } + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_CLASS_F16 class methods --- + + Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_class_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_CLASS_F16 + + Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16() + { + } // ~Inst_VOP3__V_CMP_CLASS_F16 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_CLASS_F16 class methods --- + + Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_class_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_CLASS_F16 + + Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16() + { + } // ~Inst_VOP3__V_CMPX_CLASS_F16 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // --- S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_F_F16 class methods --- + + Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_F_F16 + + Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16() + { + } // ~Inst_VOP3__V_CMP_F_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_LT_F16 class methods --- + + Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_LT_F16 + + Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16() + { + } // ~Inst_VOP3__V_CMP_LT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_F16 class methods --- + + Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_EQ_F16 + + Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16() + { + } // ~Inst_VOP3__V_CMP_EQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_LE_F16 class methods --- + + Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_LE_F16 + + Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16() + { + } // ~Inst_VOP3__V_CMP_LE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_GT_F16 class methods --- + + Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_GT_F16 + + Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16() + { + } // ~Inst_VOP3__V_CMP_GT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_LG_F16 class methods --- + + Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lg_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_LG_F16 + + Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16() + { + } // ~Inst_VOP3__V_CMP_LG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_GE_F16 class methods --- + + Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_GE_F16 + + Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16() + { + } // ~Inst_VOP3__V_CMP_GE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_O_F16 class methods --- + + Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_o_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_O_F16 + + Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16() + { + } // ~Inst_VOP3__V_CMP_O_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_U_F16 class methods --- + + Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_u_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_U_F16 + + Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16() + { + } // ~Inst_VOP3__V_CMP_U_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NGE_F16 class methods --- + + Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nge_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NGE_F16 + + Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16() + { + } // ~Inst_VOP3__V_CMP_NGE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NLG_F16 class methods --- + + Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlg_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NLG_F16 + + Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16() + { + } // ~Inst_VOP3__V_CMP_NLG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NGT_F16 class methods --- + + Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ngt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NGT_F16 + + Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16() + { + } // ~Inst_VOP3__V_CMP_NGT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NLE_F16 class methods --- + + Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nle_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NLE_F16 + + Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16() + { + } // ~Inst_VOP3__V_CMP_NLE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NEQ_F16 class methods --- + + Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_neq_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NEQ_F16 + + Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16() + { + } // ~Inst_VOP3__V_CMP_NEQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_NLT_F16 class methods --- + + Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlt_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_NLT_F16 + + Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16() + { + } // ~Inst_VOP3__V_CMP_NLT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMP_TRU_F16 class methods --- + + Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_tru_f16", true) + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOP3__V_CMP_TRU_F16 + + Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16() + { + } // ~Inst_VOP3__V_CMP_TRU_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_F16 class methods --- + + Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_f16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_F16 + + Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16() + { + } // ~Inst_VOP3__V_CMPX_F_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_F16 class methods --- + + Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_F16 + + Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16() + { + } // ~Inst_VOP3__V_CMPX_LT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_F16 class methods --- + + Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_F16 + + Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16() + { + } // ~Inst_VOP3__V_CMPX_EQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_F16 class methods --- + + Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_F16 + + Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16() + { + } // ~Inst_VOP3__V_CMPX_LE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_F16 class methods --- + + Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_F16 + + Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16() + { + } // ~Inst_VOP3__V_CMPX_GT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_LG_F16 class methods --- + + Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lg_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LG_F16 + + Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16() + { + } // ~Inst_VOP3__V_CMPX_LG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_F16 class methods --- + + Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_F16 + + Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16() + { + } // ~Inst_VOP3__V_CMPX_GE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_O_F16 class methods --- + + Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_o_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_O_F16 + + Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16() + { + } // ~Inst_VOP3__V_CMPX_O_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_U_F16 class methods --- + + Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_u_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_U_F16 + + Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16() + { + } // ~Inst_VOP3__V_CMPX_U_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NGE_F16 class methods --- + + Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nge_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGE_F16 + + Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16() + { + } // ~Inst_VOP3__V_CMPX_NGE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NLG_F16 class methods --- + + Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlg_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLG_F16 + + Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16() + { + } // ~Inst_VOP3__V_CMPX_NLG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NGT_F16 class methods --- + + Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ngt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGT_F16 + + Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16() + { + } // ~Inst_VOP3__V_CMPX_NGT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NLE_F16 class methods --- + + Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nle_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLE_F16 + + Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16() + { + } // ~Inst_VOP3__V_CMPX_NLE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NEQ_F16 class methods --- + + Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_neq_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NEQ_F16 + + Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16() + { + } // ~Inst_VOP3__V_CMPX_NEQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_NLT_F16 class methods --- + + Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlt_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLT_F16 + + Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16() + { + } // ~Inst_VOP3__V_CMPX_NLT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOP3__V_CMPX_TRU_F16 class methods --- + + Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_tru_f16", true) + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_TRU_F16 + + Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16() + { + } // ~Inst_VOP3__V_CMPX_TRU_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_F32 class methods --- + + Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_F_F32 + + Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32() + { + } // ~Inst_VOP3__V_CMP_F_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_F32 class methods --- + + Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_LT_F32 + + Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32() + { + } // ~Inst_VOP3__V_CMP_LT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_F32 class methods --- + + Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_EQ_F32 + + Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32() + { + } // ~Inst_VOP3__V_CMP_EQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_F32 class methods --- + + Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_LE_F32 + + Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32() + { + } // ~Inst_VOP3__V_CMP_LE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_F32 class methods --- + + Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_GT_F32 + + Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32() + { + } // ~Inst_VOP3__V_CMP_GT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LG_F32 class methods --- + + Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lg_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_LG_F32 + + Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32() + { + } // ~Inst_VOP3__V_CMP_LG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_F32 class methods --- + + Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_GE_F32 + + Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32() + { + } // ~Inst_VOP3__V_CMP_GE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_O_F32 class methods --- + + Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_o_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_O_F32 + + Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32() + { + } // ~Inst_VOP3__V_CMP_O_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_U_F32 class methods --- + + Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_u_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_U_F32 + + Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32() + { + } // ~Inst_VOP3__V_CMP_U_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGE_F32 class methods --- + + Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nge_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NGE_F32 + + Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32() + { + } // ~Inst_VOP3__V_CMP_NGE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLG_F32 class methods --- + + Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlg_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NLG_F32 + + Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32() + { + } // ~Inst_VOP3__V_CMP_NLG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGT_F32 class methods --- + + Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ngt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NGT_F32 + + Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32() + { + } // ~Inst_VOP3__V_CMP_NGT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLE_F32 class methods --- + + Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nle_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NLE_F32 + + Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32() + { + } // ~Inst_VOP3__V_CMP_NLE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NEQ_F32 class methods --- + + Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_neq_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NEQ_F32 + + Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32() + { + } // ~Inst_VOP3__V_CMP_NEQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLT_F32 class methods --- + + Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlt_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_NLT_F32 + + Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32() + { + } // ~Inst_VOP3__V_CMP_NLT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_TRU_F32 class methods --- + + Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_tru_f32", true) + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOP3__V_CMP_TRU_F32 + + Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32() + { + } // ~Inst_VOP3__V_CMP_TRU_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_F32 class methods --- + + Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_F32 + + Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32() + { + } // ~Inst_VOP3__V_CMPX_F_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_F32 class methods --- + + Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_F32 + + Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32() + { + } // ~Inst_VOP3__V_CMPX_LT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_F32 class methods --- + + Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_F32 + + Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32() + { + } // ~Inst_VOP3__V_CMPX_EQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_F32 class methods --- + + Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_F32 + + Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32() + { + } // ~Inst_VOP3__V_CMPX_LE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_F32 class methods --- + + Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_F32 + + Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32() + { + } // ~Inst_VOP3__V_CMPX_GT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LG_F32 class methods --- + + Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lg_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LG_F32 + + Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32() + { + } // ~Inst_VOP3__V_CMPX_LG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_F32 class methods --- + + Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_F32 + + Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32() + { + } // ~Inst_VOP3__V_CMPX_GE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_O_F32 class methods --- + + Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_o_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_O_F32 + + Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32() + { + } // ~Inst_VOP3__V_CMPX_O_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_U_F32 class methods --- + + Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_u_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_U_F32 + + Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32() + { + } // ~Inst_VOP3__V_CMPX_U_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGE_F32 class methods --- + + Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nge_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGE_F32 + + Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32() + { + } // ~Inst_VOP3__V_CMPX_NGE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLG_F32 class methods --- + + Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlg_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLG_F32 + + Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32() + { + } // ~Inst_VOP3__V_CMPX_NLG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGT_F32 class methods --- + + Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ngt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGT_F32 + + Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32() + { + } // ~Inst_VOP3__V_CMPX_NGT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLE_F32 class methods --- + + Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nle_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLE_F32 + + Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32() + { + } // ~Inst_VOP3__V_CMPX_NLE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NEQ_F32 class methods --- + + Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_neq_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NEQ_F32 + + Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32() + { + } // ~Inst_VOP3__V_CMPX_NEQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLT_F32 class methods --- + + Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlt_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLT_F32 + + Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32() + { + } // ~Inst_VOP3__V_CMPX_NLT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_TRU_F32 class methods --- + + Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_tru_f32", true) + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_TRU_F32 + + Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32() + { + } // ~Inst_VOP3__V_CMPX_TRU_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_F64 class methods --- + + Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_F_F64 + + Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64() + { + } // ~Inst_VOP3__V_CMP_F_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_F64 class methods --- + + Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_LT_F64 + + Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64() + { + } // ~Inst_VOP3__V_CMP_LT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_F64 class methods --- + + Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_EQ_F64 + + Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64() + { + } // ~Inst_VOP3__V_CMP_EQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_F64 class methods --- + + Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_LE_F64 + + Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64() + { + } // ~Inst_VOP3__V_CMP_LE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_F64 class methods --- + + Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_GT_F64 + + Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64() + { + } // ~Inst_VOP3__V_CMP_GT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LG_F64 class methods --- + + Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lg_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_LG_F64 + + Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64() + { + } // ~Inst_VOP3__V_CMP_LG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_F64 class methods --- + + Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_GE_F64 + + Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64() + { + } // ~Inst_VOP3__V_CMP_GE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_O_F64 class methods --- + + Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_o_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_O_F64 + + Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64() + { + } // ~Inst_VOP3__V_CMP_O_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_U_F64 class methods --- + + Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_u_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_U_F64 + + Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64() + { + } // ~Inst_VOP3__V_CMP_U_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGE_F64 class methods --- + + Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nge_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NGE_F64 + + Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64() + { + } // ~Inst_VOP3__V_CMP_NGE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLG_F64 class methods --- + + Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlg_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NLG_F64 + + Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64() + { + } // ~Inst_VOP3__V_CMP_NLG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NGT_F64 class methods --- + + Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ngt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NGT_F64 + + Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64() + { + } // ~Inst_VOP3__V_CMP_NGT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLE_F64 class methods --- + + Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nle_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NLE_F64 + + Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64() + { + } // ~Inst_VOP3__V_CMP_NLE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NEQ_F64 class methods --- + + Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_neq_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NEQ_F64 + + Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64() + { + } // ~Inst_VOP3__V_CMP_NEQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NLT_F64 class methods --- + + Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_nlt_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_NLT_F64 + + Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64() + { + } // ~Inst_VOP3__V_CMP_NLT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_TRU_F64 class methods --- + + Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_tru_f64", true) + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOP3__V_CMP_TRU_F64 + + Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64() + { + } // ~Inst_VOP3__V_CMP_TRU_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_F64 class methods --- + + Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_F64 + + Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64() + { + } // ~Inst_VOP3__V_CMPX_F_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_F64 class methods --- + + Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_F64 + + Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64() + { + } // ~Inst_VOP3__V_CMPX_LT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_F64 class methods --- + + Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_F64 + + Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64() + { + } // ~Inst_VOP3__V_CMPX_EQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_F64 class methods --- + + Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_F64 + + Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64() + { + } // ~Inst_VOP3__V_CMPX_LE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_F64 class methods --- + + Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_F64 + + Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64() + { + } // ~Inst_VOP3__V_CMPX_GT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LG_F64 class methods --- + + Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lg_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LG_F64 + + Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64() + { + } // ~Inst_VOP3__V_CMPX_LG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_F64 class methods --- + + Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_F64 + + Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64() + { + } // ~Inst_VOP3__V_CMPX_GE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_O_F64 class methods --- + + Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_o_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_O_F64 + + Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64() + { + } // ~Inst_VOP3__V_CMPX_O_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_U_F64 class methods --- + + Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_u_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_U_F64 + + Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64() + { + } // ~Inst_VOP3__V_CMPX_U_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGE_F64 class methods --- + + Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nge_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGE_F64 + + Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64() + { + } // ~Inst_VOP3__V_CMPX_NGE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLG_F64 class methods --- + + Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlg_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLG_F64 + + Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64() + { + } // ~Inst_VOP3__V_CMPX_NLG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NGT_F64 class methods --- + + Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ngt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NGT_F64 + + Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64() + { + } // ~Inst_VOP3__V_CMPX_NGT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLE_F64 class methods --- + + Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nle_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLE_F64 + + Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64() + { + } // ~Inst_VOP3__V_CMPX_NLE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NEQ_F64 class methods --- + + Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_neq_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NEQ_F64 + + Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64() + { + } // ~Inst_VOP3__V_CMPX_NEQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NLT_F64 class methods --- + + Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_nlt_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NLT_F64 + + Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64() + { + } // ~Inst_VOP3__V_CMPX_NLT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + if (instData.ABS & 0x1) { + src0.absModifier(); + } + + if (instData.ABS & 0x2) { + src1.absModifier(); + } + + if (extData.NEG & 0x1) { + src0.negModifier(); + } + + if (extData.NEG & 0x2) { + src1.negModifier(); + } + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_TRU_F64 class methods --- + + Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_tru_f64", true) + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_TRU_F64 + + Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64() + { + } // ~Inst_VOP3__V_CMPX_TRU_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_I16 class methods --- + + Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_I16 + + Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16() + { + } // ~Inst_VOP3__V_CMP_F_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_I16 class methods --- + + Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_I16 + + Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16() + { + } // ~Inst_VOP3__V_CMP_LT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_I16 class methods --- + + Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_I16 + + Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16() + { + } // ~Inst_VOP3__V_CMP_EQ_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_I16 class methods --- + + Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_I16 + + Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16() + { + } // ~Inst_VOP3__V_CMP_LE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_I16 class methods --- + + Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_I16 + + Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16() + { + } // ~Inst_VOP3__V_CMP_GT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_I16 class methods --- + + Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_I16 + + Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16() + { + } // ~Inst_VOP3__V_CMP_NE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_I16 class methods --- + + Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_I16 + + Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16() + { + } // ~Inst_VOP3__V_CMP_GE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_I16 class methods --- + + Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_i16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_I16 + + Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16() + { + } // ~Inst_VOP3__V_CMP_T_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_U16 class methods --- + + Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_U16 + + Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16() + { + } // ~Inst_VOP3__V_CMP_F_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_U16 class methods --- + + Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_U16 + + Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16() + { + } // ~Inst_VOP3__V_CMP_LT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_U16 class methods --- + + Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_U16 + + Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16() + { + } // ~Inst_VOP3__V_CMP_EQ_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_U16 class methods --- + + Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_U16 + + Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16() + { + } // ~Inst_VOP3__V_CMP_LE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_U16 class methods --- + + Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_U16 + + Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16() + { + } // ~Inst_VOP3__V_CMP_GT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_U16 class methods --- + + Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_U16 + + Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16() + { + } // ~Inst_VOP3__V_CMP_NE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_U16 class methods --- + + Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_U16 + + Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16() + { + } // ~Inst_VOP3__V_CMP_GE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_U16 class methods --- + + Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_u16", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_U16 + + Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16() + { + } // ~Inst_VOP3__V_CMP_T_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_I16 class methods --- + + Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_I16 + + Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16() + { + } // ~Inst_VOP3__V_CMPX_F_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_I16 class methods --- + + Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_I16 + + Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16() + { + } // ~Inst_VOP3__V_CMPX_LT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_I16 class methods --- + + Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_I16 + + Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16() + { + } // ~Inst_VOP3__V_CMPX_EQ_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_I16 class methods --- + + Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_I16 + + Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16() + { + } // ~Inst_VOP3__V_CMPX_LE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_I16 class methods --- + + Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_I16 + + Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16() + { + } // ~Inst_VOP3__V_CMPX_GT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_I16 class methods --- + + Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_I16 + + Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16() + { + } // ~Inst_VOP3__V_CMPX_NE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_I16 class methods --- + + Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_I16 + + Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16() + { + } // ~Inst_VOP3__V_CMPX_GE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_I16 class methods --- + + Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_i16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_I16 + + Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16() + { + } // ~Inst_VOP3__V_CMPX_T_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_U16 class methods --- + + Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_U16 + + Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16() + { + } // ~Inst_VOP3__V_CMPX_F_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_U16 class methods --- + + Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_U16 + + Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16() + { + } // ~Inst_VOP3__V_CMPX_LT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_U16 class methods --- + + Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_U16 + + Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16() + { + } // ~Inst_VOP3__V_CMPX_EQ_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_U16 class methods --- + + Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_U16 + + Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16() + { + } // ~Inst_VOP3__V_CMPX_LE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_U16 class methods --- + + Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_U16 + + Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16() + { + } // ~Inst_VOP3__V_CMPX_GT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_U16 class methods --- + + Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_U16 + + Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16() + { + } // ~Inst_VOP3__V_CMPX_NE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_U16 class methods --- + + Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_U16 + + Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16() + { + } // ~Inst_VOP3__V_CMPX_GE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_U16 class methods --- + + Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_u16", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_U16 + + Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16() + { + } // ~Inst_VOP3__V_CMPX_T_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_I32 class methods --- + + Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_I32 + + Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32() + { + } // ~Inst_VOP3__V_CMP_F_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_I32 class methods --- + + Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_I32 + + Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32() + { + } // ~Inst_VOP3__V_CMP_LT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_I32 class methods --- + + Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_I32 + + Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32() + { + } // ~Inst_VOP3__V_CMP_EQ_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_I32 class methods --- + + Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_I32 + + Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32() + { + } // ~Inst_VOP3__V_CMP_LE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_I32 class methods --- + + Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_I32 + + Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32() + { + } // ~Inst_VOP3__V_CMP_GT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_I32 class methods --- + + Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_I32 + + Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32() + { + } // ~Inst_VOP3__V_CMP_NE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_I32 class methods --- + + Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_I32 + + Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32() + { + } // ~Inst_VOP3__V_CMP_GE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_I32 class methods --- + + Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_i32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_I32 + + Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32() + { + } // ~Inst_VOP3__V_CMP_T_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_U32 class methods --- + + Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_U32 + + Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32() + { + } // ~Inst_VOP3__V_CMP_F_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_U32 class methods --- + + Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_U32 + + Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32() + { + } // ~Inst_VOP3__V_CMP_LT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_U32 class methods --- + + Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_U32 + + Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32() + { + } // ~Inst_VOP3__V_CMP_EQ_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_U32 class methods --- + + Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_U32 + + Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32() + { + } // ~Inst_VOP3__V_CMP_LE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_U32 class methods --- + + Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_U32 + + Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32() + { + } // ~Inst_VOP3__V_CMP_GT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_U32 class methods --- + + Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_U32 + + Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32() + { + } // ~Inst_VOP3__V_CMP_NE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_U32 class methods --- + + Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_U32 + + Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32() + { + } // ~Inst_VOP3__V_CMP_GE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_U32 class methods --- + + Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_u32", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_U32 + + Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32() + { + } // ~Inst_VOP3__V_CMP_T_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_I32 class methods --- + + Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_I32 + + Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32() + { + } // ~Inst_VOP3__V_CMPX_F_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_I32 class methods --- + + Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_I32 + + Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32() + { + } // ~Inst_VOP3__V_CMPX_LT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_I32 class methods --- + + Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_I32 + + Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32() + { + } // ~Inst_VOP3__V_CMPX_EQ_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_I32 class methods --- + + Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_I32 + + Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32() + { + } // ~Inst_VOP3__V_CMPX_LE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_I32 class methods --- + + Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_I32 + + Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32() + { + } // ~Inst_VOP3__V_CMPX_GT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_I32 class methods --- + + Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_I32 + + Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32() + { + } // ~Inst_VOP3__V_CMPX_NE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_I32 class methods --- + + Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_I32 + + Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32() + { + } // ~Inst_VOP3__V_CMPX_GE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_I32 class methods --- + + Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_i32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_I32 + + Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32() + { + } // ~Inst_VOP3__V_CMPX_T_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_U32 class methods --- + + Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_U32 + + Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32() + { + } // ~Inst_VOP3__V_CMPX_F_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_U32 class methods --- + + Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_U32 + + Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32() + { + } // ~Inst_VOP3__V_CMPX_LT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_U32 class methods --- + + Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_U32 + + Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32() + { + } // ~Inst_VOP3__V_CMPX_EQ_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_U32 class methods --- + + Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_U32 + + Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32() + { + } // ~Inst_VOP3__V_CMPX_LE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_U32 class methods --- + + Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_U32 + + Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32() + { + } // ~Inst_VOP3__V_CMPX_GT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_U32 class methods --- + + Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_U32 + + Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32() + { + } // ~Inst_VOP3__V_CMPX_NE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_U32 class methods --- + + Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_U32 + + Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32() + { + } // ~Inst_VOP3__V_CMPX_GE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_U32 class methods --- + + Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_u32", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_U32 + + Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32() + { + } // ~Inst_VOP3__V_CMPX_T_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_I64 class methods --- + + Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_I64 + + Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64() + { + } // ~Inst_VOP3__V_CMP_F_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_I64 class methods --- + + Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_I64 + + Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64() + { + } // ~Inst_VOP3__V_CMP_LT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_I64 class methods --- + + Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_I64 + + Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64() + { + } // ~Inst_VOP3__V_CMP_EQ_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_I64 class methods --- + + Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_I64 + + Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64() + { + } // ~Inst_VOP3__V_CMP_LE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_I64 class methods --- + + Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_I64 + + Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64() + { + } // ~Inst_VOP3__V_CMP_GT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_I64 class methods --- + + Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_I64 + + Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64() + { + } // ~Inst_VOP3__V_CMP_NE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_I64 class methods --- + + Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_I64 + + Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64() + { + } // ~Inst_VOP3__V_CMP_GE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_I64 class methods --- + + Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_i64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_I64 + + Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64() + { + } // ~Inst_VOP3__V_CMP_T_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_F_U64 class methods --- + + Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_f_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_F_U64 + + Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64() + { + } // ~Inst_VOP3__V_CMP_F_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LT_U64 class methods --- + + Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_lt_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LT_U64 + + Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64() + { + } // ~Inst_VOP3__V_CMP_LT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_EQ_U64 class methods --- + + Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_eq_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_EQ_U64 + + Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64() + { + } // ~Inst_VOP3__V_CMP_EQ_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_LE_U64 class methods --- + + Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_le_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_LE_U64 + + Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64() + { + } // ~Inst_VOP3__V_CMP_LE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GT_U64 class methods --- + + Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_gt_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GT_U64 + + Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64() + { + } // ~Inst_VOP3__V_CMP_GT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_NE_U64 class methods --- + + Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ne_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_NE_U64 + + Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64() + { + } // ~Inst_VOP3__V_CMP_NE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_GE_U64 class methods --- + + Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_ge_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_GE_U64 + + Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64() + { + } // ~Inst_VOP3__V_CMP_GE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMP_T_U64 class methods --- + + Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmp_t_u64", true) + { + setFlag(ALU); + } // Inst_VOP3__V_CMP_T_U64 + + Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64() + { + } // ~Inst_VOP3__V_CMP_T_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_I64 class methods --- + + Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_I64 + + Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64() + { + } // ~Inst_VOP3__V_CMPX_F_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_I64 class methods --- + + Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_I64 + + Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64() + { + } // ~Inst_VOP3__V_CMPX_LT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_I64 class methods --- + + Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_I64 + + Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64() + { + } // ~Inst_VOP3__V_CMPX_EQ_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_I64 class methods --- + + Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_I64 + + Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64() + { + } // ~Inst_VOP3__V_CMPX_LE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_I64 class methods --- + + Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_I64 + + Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64() + { + } // ~Inst_VOP3__V_CMPX_GT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_I64 class methods --- + + Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_I64 + + Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64() + { + } // ~Inst_VOP3__V_CMPX_NE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_I64 class methods --- + + Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_I64 + + Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64() + { + } // ~Inst_VOP3__V_CMPX_GE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_I64 class methods --- + + Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_i64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_I64 + + Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64() + { + } // ~Inst_VOP3__V_CMPX_T_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_F_U64 class methods --- + + Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_f_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_F_U64 + + Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64() + { + } // ~Inst_VOP3__V_CMPX_F_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LT_U64 class methods --- + + Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_lt_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LT_U64 + + Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64() + { + } // ~Inst_VOP3__V_CMPX_LT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_EQ_U64 class methods --- + + Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_eq_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_EQ_U64 + + Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64() + { + } // ~Inst_VOP3__V_CMPX_EQ_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_LE_U64 class methods --- + + Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_le_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_LE_U64 + + Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64() + { + } // ~Inst_VOP3__V_CMPX_LE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GT_U64 class methods --- + + Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_gt_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GT_U64 + + Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64() + { + } // ~Inst_VOP3__V_CMPX_GT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_NE_U64 class methods --- + + Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ne_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_NE_U64 + + Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64() + { + } // ~Inst_VOP3__V_CMPX_NE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_GE_U64 class methods --- + + Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_ge_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_GE_U64 + + Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64() + { + } // ~Inst_VOP3__V_CMPX_GE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute + // --- Inst_VOP3__V_CMPX_T_U64 class methods --- + + Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64( + InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cmpx_t_u64", true) + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOP3__V_CMPX_T_U64 + + Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64() + { + } // ~Inst_VOP3__V_CMPX_T_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 sdst(gpuDynInst, instData.VDST); + + /** + * input modifiers are supported by FP operations only + */ + assert(!(instData.ABS & 0x1)); + assert(!(instData.ABS & 0x2)); + assert(!(instData.ABS & 0x4)); + assert(!(extData.NEG & 0x1)); + assert(!(extData.NEG & 0x2)); + assert(!(extData.NEG & 0x4)); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + sdst.setBit(lane, 1); + } + } + + wf->execMask() = sdst.rawData(); + sdst.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3p.cc b/src/arch/amdgpu/vega/insts/vop3p.cc new file mode 100644 index 0000000000..96c296df67 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3p.cc @@ -0,0 +1,913 @@ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/vop3p.hh" + +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "arch/arm/insts/fplib.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + +using half = uint16_t; + +// Helper functions +template +int32_t +dotClampI(int32_t value, bool clamp) +{ + // Only valid for N < 32 + static_assert(N < 32); + + if (!clamp) { + return static_cast(value); + } + + int32_t min = -(1 << (N - 1)); + int32_t max = (1 << (N - 1)) - 1; + return std::clamp(value, min, max); +} + +template +uint32_t +dotClampU(uint32_t value, bool clamp) +{ + // Only valid for N < 32 + static_assert(N < 32); + + if (!clamp) { + return static_cast(value); + } + + uint32_t min = 0; + uint32_t max = (1 << N) - 1; + return std::clamp(value, min, max); +} + +int16_t +clampI16(int32_t value, bool clamp) +{ + if (!clamp) { + return static_cast(value); + } + + return std::clamp(value, + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max())); +} + +uint16_t +clampU16(uint32_t value, bool clamp) +{ + if (!clamp) { + return static_cast(value); + } + + return std::clamp(value, + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max())); +} + +uint16_t +clampF16(uint16_t value, bool clamp) +{ + if (!clamp) { + return value; + } + + // Values of one and zero in fp16. + constexpr uint16_t one = 0x3c00; + constexpr uint16_t zero = 0x0; + ArmISA::FPSCR fpscr1, fpscr2; + + // If value > one, set to one, then if value < zero set to zero. + uint16_t imm = fplibMin(value, one, fpscr1); + return fplibMax(imm, zero, fpscr2); +} + +float +clampF32(float value, bool clamp) +{ + if (!clamp) { + return value; + } + + return std::clamp(value, 0.0f, 1.0f); +} + + + + +// Begin instruction execute definitions +void Inst_VOP3P__V_PK_MAD_I16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](int16_t S0, int16_t S1, int16_t S2, bool clamp) -> int16_t + { + return clampI16(S0 * S1 + S2, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void +Inst_VOP3P__V_PK_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool) -> uint16_t + { + // Only return lower 16 bits of result - This operation cannot clamp. + uint32_t D = S0 * S1; + uint16_t Dh = D & 0xFFFF; + return Dh; + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_ADD_I16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](int16_t S0, int16_t S1, bool clamp) -> int16_t + { + return clampI16(S0 + S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_SUB_I16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](int16_t S0, int16_t S1, bool clamp) -> int16_t + { + return clampI16(S0 - S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool) -> uint16_t + { + unsigned shift_val = bits(S0, 3, 0); + + // Shift does not clamp + return S1 << shift_val; + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool) -> uint16_t + { + unsigned shift_val = bits(S0, 3, 0); + + return S1 >> shift_val; + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_ASHRREV_B16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](int16_t S0, int16_t S1, bool clamp) -> int16_t + { + // Sign extend to larger type to ensure we don't lose sign bits when + // shifting. + int32_t S1e = S1; + unsigned shift_val = bits(S0, 3, 0); + + return S1e >> shift_val; + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MAX_I16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](int16_t S0, int16_t S1, bool clamp) -> int16_t + { + return clampI16((S0 >= S1) ? S0 : S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MIN_I16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](int16_t S0, int16_t S1, bool clamp) -> int16_t + { + return clampI16((S0 < S1) ? S0 : S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MAD_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint16_t S0, uint16_t S1, uint16_t S2, bool clamp) -> uint16_t + { + return clampU16(S0 * S1 + S2, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_ADD_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool clamp) -> uint16_t + { + return clampU16(S0 + S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_SUB_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool clamp) -> uint16_t + { + return clampU16(S0 - S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MAX_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool clamp) -> uint16_t + { + return clampU16((S0 >= S1) ? S0 : S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MIN_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](uint16_t S0, uint16_t S1, bool clamp) -> uint16_t + { + return clampU16((S0 < S1) ? S0 : S1, clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_FMA_F16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](half S0, half S1, half S2, bool clamp) -> half + { + ArmISA::FPSCR fpscr; + return clampF16(fplibMulAdd(S2, S0, S1, fpscr), clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_ADD_F16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](half S0, half S1, bool clamp) -> half + { + ArmISA::FPSCR fpscr; + return clampF16(fplibAdd(S0, S1, fpscr), clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MUL_F16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](half S0, half S1, bool clamp) -> half + { + ArmISA::FPSCR fpscr; + return clampF16(fplibMul(S0, S1, fpscr), clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MIN_F16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](half S0, half S1, bool clamp) -> half + { + ArmISA::FPSCR fpscr; + return clampF16(fplibMin(S0, S1, fpscr), clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_PK_MAX_F16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = [](half S0, half S1, bool clamp) -> half + { + ArmISA::FPSCR fpscr; + return clampF16(fplibMax(S0, S1, fpscr), clamp); + }; + + vop3pHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT2_F32_F16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2r, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 16; + + constexpr unsigned elems = 32 / INBITS; + half S0[elems]; + half S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + float S2 = *reinterpret_cast(&S2r); + + // Compute components individually to prevent overflow across packing + half C[elems]; + float Csum = 0.0f; + + for (int i = 0; i < elems; ++i) { + ArmISA::FPSCR fpscr; + C[i] = fplibMul(S0[i], S1[i], fpscr); + uint32_t conv = + ArmISA::fplibConvert( + C[i], ArmISA::FPRounding_TIEEVEN, fpscr); + Csum += clampF32(*reinterpret_cast(&conv), clamp); + } + + Csum += S2; + uint32_t rv = *reinterpret_cast(&Csum); + + return rv; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT2_I32_I16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2r, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 16; + + constexpr unsigned elems = 32 / INBITS; + uint32_t S0[elems]; + uint32_t S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + int32_t S2 = *reinterpret_cast(&S2r); + + // Compute components individually to prevent overflow across packing + int32_t C[elems]; + int32_t Csum = 0; + + for (int i = 0; i < elems; ++i) { + C[i] = sext(S0[i]) * sext(S1[i]); + C[i] = sext(dotClampI(C[i], clamp) & mask(INBITS)); + Csum += C[i]; + } + + Csum += S2; + uint32_t rv = *reinterpret_cast(&Csum); + + return rv; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT2_U32_U16::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 16; + + constexpr unsigned elems = 32 / INBITS; + uint32_t S0[elems]; + uint32_t S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + // Compute components individually to prevent overflow across packing + uint32_t C[elems]; + uint32_t Csum = 0; + + for (int i = 0; i < elems; ++i) { + C[i] = S0[i] * S1[i]; + C[i] = dotClampU(C[i], clamp); + Csum += C[i]; + } + + Csum += S2; + + return Csum; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT4_I32_I8::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2r, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 8; + + constexpr unsigned elems = 32 / INBITS; + uint32_t S0[elems]; + uint32_t S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + int32_t S2 = *reinterpret_cast(&S2r); + + // Compute components individually to prevent overflow across packing + int32_t C[elems]; + int32_t Csum = 0; + + for (int i = 0; i < elems; ++i) { + C[i] = sext(S0[i]) * sext(S1[i]); + C[i] = sext(dotClampI(C[i], clamp) & mask(INBITS)); + Csum += C[i]; + } + + Csum += S2; + uint32_t rv = *reinterpret_cast(&Csum); + + return rv; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT4_U32_U8::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 8; + + constexpr unsigned elems = 32 / INBITS; + uint32_t S0[elems]; + uint32_t S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + // Compute components individually to prevent overflow across packing + uint32_t C[elems]; + uint32_t Csum = 0; + + for (int i = 0; i < elems; ++i) { + C[i] = S0[i] * S1[i]; + C[i] = dotClampU(C[i], clamp); + Csum += C[i]; + } + + Csum += S2; + + return Csum; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT8_I32_I4::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2r, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 4; + + constexpr unsigned elems = 32 / INBITS; + uint32_t S0[elems]; + uint32_t S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + int32_t S2 = *reinterpret_cast(&S2r); + + // Compute components individually to prevent overflow across packing + int32_t C[elems]; + int32_t Csum = 0; + + for (int i = 0; i < elems; ++i) { + C[i] = sext(S0[i]) * sext(S1[i]); + C[i] = sext(dotClampI(C[i], clamp) & mask(INBITS)); + Csum += C[i]; + } + + Csum += S2; + uint32_t rv = *reinterpret_cast(&Csum); + + return rv; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_DOT8_U32_U4::execute(GPUDynInstPtr gpuDynInst) +{ + auto opImpl = + [](uint32_t S0r, uint32_t S1r, uint32_t S2, bool clamp) -> uint32_t + { + constexpr unsigned INBITS = 4; + + constexpr unsigned elems = 32 / INBITS; + uint32_t S0[elems]; + uint32_t S1[elems]; + + for (int i = 0; i < elems; ++i) { + S0[i] = bits(S0r, i*INBITS+INBITS-1, i*INBITS); + S1[i] = bits(S1r, i*INBITS+INBITS-1, i*INBITS); + } + + // Compute components individually to prevent overflow across packing + uint32_t C[elems]; + uint32_t Csum = 0; + + for (int i = 0; i < elems; ++i) { + C[i] = S0[i] * S1[i]; + C[i] = dotClampU(C[i], clamp); + Csum += C[i]; + } + + Csum += S2; + + return Csum; + }; + + dotHelper(gpuDynInst, opImpl); +} + +void Inst_VOP3P__V_ACCVGPR_READ::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *wf = gpuDynInst->wavefront(); + unsigned accum_offset = wf->accumOffset; + + ConstVecOperandU32 src(gpuDynInst, extData.SRC0+accum_offset); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); +} + +void Inst_VOP3P__V_ACCVGPR_WRITE::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *wf = gpuDynInst->wavefront(); + unsigned accum_offset = wf->accumOffset; + + ConstVecOperandU32 src(gpuDynInst, extData.SRC0); + VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); +} + +// --- Inst_VOP3P__V_PK_FMA_F32 class methods --- + +Inst_VOP3P__V_PK_FMA_F32::Inst_VOP3P__V_PK_FMA_F32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_fma_f32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_FMA_F32 + +Inst_VOP3P__V_PK_FMA_F32::~Inst_VOP3P__V_PK_FMA_F32() +{ +} // ~Inst_VOP3P__V_PK_FMA_F32 + +// D.f[63:32] = S0.f[63:32] * S1.f[63:32] + S2.f[63:32] . D.f[31:0] = +// S0.f[31:0] * S1.f[31:0] + S2.f[31:0] . +void +Inst_VOP3P__V_PK_FMA_F32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. U64 is used here as float + // values cannot use bitwise operations. Consider the U64 to imply + // untyped 64-bits of data. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + ConstVecOperandU64 src2(gpuDynInst, extData.SRC2); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + src2.readSrc(); + + int opsel = instData.OPSEL; + int opsel_hi = extData.OPSEL_HI | (instData.OPSEL_HI2 << 2); + + int neg = extData.NEG; + int neg_hi = instData.NEG_HI; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t s0l = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t s1l = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + uint32_t s2l = (opsel & 4) ? bits(src2[lane], 63, 32) + : bits(src2[lane], 31, 0); + + float s0lf = *reinterpret_cast(&s0l); + float s1lf = *reinterpret_cast(&s1l); + float s2lf = *reinterpret_cast(&s2l); + + if (neg & 1) s0lf = -s0lf; + if (neg & 1) s1lf = -s1lf; + if (neg & 1) s2lf = -s2lf; + + float dword1 = std::fma(s0lf, s1lf, s2lf); + + uint32_t s0h = (opsel_hi & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t s1h = (opsel_hi & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + uint32_t s2h = (opsel_hi & 4) ? bits(src2[lane], 63, 32) + : bits(src2[lane], 31, 0); + + float s0hf = *reinterpret_cast(&s0h); + float s1hf = *reinterpret_cast(&s1h); + float s2hf = *reinterpret_cast(&s2h); + + if (neg_hi & 1) s0hf = -s0hf; + if (neg_hi & 1) s1hf = -s1hf; + if (neg_hi & 1) s2hf = -s2hf; + + float dword2 = std::fma(s0hf, s1hf, s2hf); + + uint32_t result1 = *reinterpret_cast(&dword1); + uint32_t result2 = *reinterpret_cast(&dword2); + + vdst[lane] = (static_cast(result2) << 32) | result1; + } + } + + vdst.write(); +} // execute +// --- Inst_VOP3P__V_PK_MUL_F32 class methods --- + +Inst_VOP3P__V_PK_MUL_F32::Inst_VOP3P__V_PK_MUL_F32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_mul_f32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_MUL_F32 + +Inst_VOP3P__V_PK_MUL_F32::~Inst_VOP3P__V_PK_MUL_F32() +{ +} // ~Inst_VOP3P__V_PK_MUL_F32 + +// D.f[63:32] = S0.f[63:32] * S1.f[63:32] . D.f[31:0] = S0.f[31:0] * +// S1.f[31:0] +void +Inst_VOP3P__V_PK_MUL_F32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. U64 is used here as float + // values cannot use bitwise operations. Consider the U64 to imply + // untyped 64-bits of data. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + int opsel = instData.OPSEL; + int opsel_hi = extData.OPSEL_HI; + + int neg = extData.NEG; + int neg_hi = instData.NEG_HI; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + float ldwordf = *reinterpret_cast(&lower_dword); + float udwordf = *reinterpret_cast(&upper_dword); + + if (neg & 1) ldwordf = -ldwordf; + if (neg & 2) udwordf = -udwordf; + + float dword1 = ldwordf * udwordf; + + lower_dword = (opsel_hi & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + upper_dword = (opsel_hi & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + ldwordf = *reinterpret_cast(&lower_dword); + udwordf = *reinterpret_cast(&upper_dword); + + if (neg_hi & 1) ldwordf = -ldwordf; + if (neg_hi & 2) udwordf = -udwordf; + + float dword2 = ldwordf * udwordf; + + uint32_t result1 = *reinterpret_cast(&dword1); + uint32_t result2 = *reinterpret_cast(&dword2); + + vdst[lane] = (static_cast(result2) << 32) | result1; + } + } + + vdst.write(); +} // execute +// --- Inst_VOP3P__V_PK_ADD_F32 class methods --- + +Inst_VOP3P__V_PK_ADD_F32::Inst_VOP3P__V_PK_ADD_F32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_add_f32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_ADD_F32 + +Inst_VOP3P__V_PK_ADD_F32::~Inst_VOP3P__V_PK_ADD_F32() +{ +} // ~Inst_VOP3P__V_PK_ADD_F32 + +// D.f[63:32] = S0.f[63:32] + S1.f[63:32] . D.f[31:0] = S0.f[31:0] + +// S1.f[31:0] +void +Inst_VOP3P__V_PK_ADD_F32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. U64 is used here as float + // values cannot use bitwise operations. Consider the U64 to imply + // untyped 64-bits of data. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + + int opsel = instData.OPSEL; + int opsel_hi = extData.OPSEL_HI; + + int neg = extData.NEG; + int neg_hi = instData.NEG_HI; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + uint32_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint32_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + float ldwordf = *reinterpret_cast(&lower_dword); + float udwordf = *reinterpret_cast(&upper_dword); + + if (neg & 1) ldwordf = -ldwordf; + if (neg & 2) udwordf = -udwordf; + + float dword1 = ldwordf + udwordf; + + lower_dword = (opsel_hi & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + upper_dword = (opsel_hi & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + ldwordf = *reinterpret_cast(&lower_dword); + udwordf = *reinterpret_cast(&upper_dword); + + if (neg_hi & 1) ldwordf = -ldwordf; + if (neg_hi & 2) udwordf = -udwordf; + + float dword2 = ldwordf + udwordf; + + uint32_t result1 = *reinterpret_cast(&dword1); + uint32_t result2 = *reinterpret_cast(&dword2); + + vdst[lane] = (static_cast(result2) << 32) | result1; + } + } + + vdst.write(); +} // execute +// --- Inst_VOP3P__V_PK_MOV_B32 class methods --- + +Inst_VOP3P__V_PK_MOV_B32::Inst_VOP3P__V_PK_MOV_B32(InFmt_VOP3P *iFmt) + : Inst_VOP3P(iFmt, "v_pk_mov_b32") +{ + setFlag(ALU); +} // Inst_VOP3P__V_PK_MOV_B32 + +Inst_VOP3P__V_PK_MOV_B32::~Inst_VOP3P__V_PK_MOV_B32() +{ +} // ~Inst_VOP3P__V_PK_MOV_B32 + +// D.u[63:32] = S1.u[31:0]; D.u[31:0] = S0.u[31:0]. +void +Inst_VOP3P__V_PK_MOV_B32::execute(GPUDynInstPtr gpuDynInst) +{ + // This is a special case of packed instructions which operates on + // 64-bit inputs/outputs and not 32-bit. + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, extData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, extData.SRC1); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + + // Only OPSEL[1:0] are used + // OPSEL[0] 0/1: Lower dest dword = lower/upper dword of src0 + int opsel = instData.OPSEL; + + warn_if(instData.NEG_HI || extData.NEG, + "Negative modifier undefined for %s", _opcode); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + // OPSEL[1] 0/1: Lower dest dword = lower/upper dword of src1 + uint64_t lower_dword = (opsel & 1) ? bits(src0[lane], 63, 32) + : bits(src0[lane], 31, 0); + uint64_t upper_dword = (opsel & 2) ? bits(src1[lane], 63, 32) + : bits(src1[lane], 31, 0); + + vdst[lane] = upper_dword << 32 | lower_dword; + } + } + + vdst.write(); +} // execute + +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3p.hh b/src/arch/amdgpu/vega/insts/vop3p.hh new file mode 100644 index 0000000000..fbb81f12f7 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vop3p.hh @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_VEGA_INSTS_VOP3P_HH__ +#define __ARCH_VEGA_INSTS_VOP3P_HH__ + +#include "arch/amdgpu/vega/gpu_decoder.hh" +#include "arch/amdgpu/vega/insts/gpu_static_inst.hh" +#include "arch/amdgpu/vega/insts/op_encodings.hh" +#include "debug/VEGA.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // One source operand + class Inst_VOP3P__1OP : public Inst_VOP3P + { + public: + Inst_VOP3P__1OP(InFmt_VOP3P *iFmt, const std::string& name) + : Inst_VOP3P(iFmt, name) + { + setFlag(ALU); + } + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src + return 4; + case 1: // dst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } + + }; + + // Two source operands with two 16-bit values in a dword + class Inst_VOP3P__2OP_X16 : public Inst_VOP3P + { + public: + Inst_VOP3P__2OP_X16(InFmt_VOP3P *iFmt, const std::string& name) + : Inst_VOP3P(iFmt, name) + { + setFlag(ALU); + } + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src0 + return 4; + case 1: // src1 + return 4; + case 2: // dst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } + + }; + + // Three source operands with two 16-bit values in a dword + class Inst_VOP3P__3OP_X16 : public Inst_VOP3P + { + public: + Inst_VOP3P__3OP_X16(InFmt_VOP3P *iFmt, const std::string& name) + : Inst_VOP3P(iFmt, name) + { + setFlag(ALU); + } + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 3; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: // src0 + return 4; + case 1: // src1 + return 4; + case 2: // src2 + return 4; + case 3: // dst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } + + }; + + + + // Begin instruction implementations + class Inst_VOP3P__V_PK_MAD_I16 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_PK_MAD_I16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_pk_mad_i16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MUL_LO_U16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MUL_LO_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_mul_lo_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_ADD_I16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_ADD_I16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_add_i16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_SUB_I16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_SUB_I16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_sub_i16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_LSHLREV_B16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_LSHLREV_B16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_lshlrev_b16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_LSHRREV_B16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_LSHRREV_B16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_lshrrev_b16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_ASHRREV_B16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_ASHRREV_B16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_ashrrev_b16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MAX_I16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MAX_I16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_max_i16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MIN_I16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MIN_I16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_min_i16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MAD_U16 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_PK_MAD_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_pk_mad_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_ADD_U16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_ADD_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_add_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_SUB_U16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_SUB_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_sub_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MAX_U16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MAX_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_max_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MIN_U16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MIN_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_min_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_FMA_F16 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_PK_FMA_F16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_pk_fma_f16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_ADD_F16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_ADD_F16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_add_f16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MUL_F16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MUL_F16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_mul_f16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MIN_F16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MIN_F16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_min_f16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_PK_MAX_F16 : public Inst_VOP3P__2OP_X16 + { + public: + Inst_VOP3P__V_PK_MAX_F16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__2OP_X16(iFmt, "v_pk_max_f16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT2_F32_F16 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT2_F32_F16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot2_f32_f16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT2_I32_I16 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT2_I32_I16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot2_i32_i16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT2_U32_U16 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT2_U32_U16(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot2_u32_u16") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT4_I32_I8 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT4_I32_I8(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot4_i32_i8") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT4_U32_U8 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT4_U32_U8(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot4_u32_u8") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT8_I32_I4 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT8_I32_I4(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot8_i32_i4") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_DOT8_U32_U4 : public Inst_VOP3P__3OP_X16 + { + public: + Inst_VOP3P__V_DOT8_U32_U4(InFmt_VOP3P *iFmt) + : Inst_VOP3P__3OP_X16(iFmt, "v_dot8_u32_u4") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_ACCVGPR_READ : public Inst_VOP3P__1OP + { + public: + Inst_VOP3P__V_ACCVGPR_READ(InFmt_VOP3P *iFmt) + : Inst_VOP3P__1OP(iFmt, "v_accvgpr_read") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; + + class Inst_VOP3P__V_ACCVGPR_WRITE : public Inst_VOP3P__1OP + { + public: + Inst_VOP3P__V_ACCVGPR_WRITE(InFmt_VOP3P *iFmt) + : Inst_VOP3P__1OP(iFmt, "v_accvgpr_write") + { } + + void execute(GPUDynInstPtr gpuDynInst) override; + }; +} // namespace VegaISA +} // namespace gem5 + +#endif // __ARCH_VEGA_INSTS_VOP3P_HH__ diff --git a/src/arch/amdgpu/gcn3/gpu_types.hh b/src/arch/amdgpu/vega/insts/vop3p_mai.cc similarity index 61% rename from src/arch/amdgpu/gcn3/gpu_types.hh rename to src/arch/amdgpu/vega/insts/vop3p_mai.cc index 4cb862de48..d9bf0dd516 100644 --- a/src/arch/amdgpu/gcn3/gpu_types.hh +++ b/src/arch/amdgpu/vega/insts/vop3p_mai.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,38 +29,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef __ARCH_GCN3_GPU_TYPES_HH__ -#define __ARCH_GCN3_GPU_TYPES_HH__ - -#include +#include "arch/amdgpu/vega/insts/instructions.hh" +#include "arch/amdgpu/vega/insts/vop3p.hh" namespace gem5 { -namespace Gcn3ISA +namespace VegaISA { - union InstFormat; - - /** - * used to represnt a GPU inst in its raw format. GCN3 - * instructions may be 32b or 64b, therefore we represent - * a raw inst with 64b to ensure that all of its inst data, - * including potential immediate values, may be represented - * in the worst case. - */ - typedef uint64_t RawMachInst; - - /** - * used to represent the encoding of a GCN3 inst. each portion - * of a GCN3 inst must be 1 DWORD (32b), so we use a pointer - * to InstFormat type (which is 32b). for the case in which we - * need multiple DWORDS to represnt a single inst, this pointer - * essentialy acts as an array of the DWORDs needed to represent - * the entire inst encoding. - */ - typedef InstFormat *MachInst; - -} // namespace Gcn3ISA +} // namespace VegaISA } // namespace gem5 - -#endif // __ARCH_GCN3_GPU_TYPES_HH__ diff --git a/src/arch/amdgpu/vega/insts/vopc.cc b/src/arch/amdgpu/vega/insts/vopc.cc new file mode 100644 index 0000000000..2c386fec74 --- /dev/null +++ b/src/arch/amdgpu/vega/insts/vopc.cc @@ -0,0 +1,6590 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/amdgpu/vega/insts/instructions.hh" + +namespace gem5 +{ + +namespace VegaISA +{ + // --- Inst_VOPC__V_CMP_CLASS_F32 class methods --- + + Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_class_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_CLASS_F32 + + Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32() + { + } // ~Inst_VOPC__V_CMP_CLASS_F32 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_CLASS_F32 class methods --- + + Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_class_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_CLASS_F32 + + Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32() + { + } // ~Inst_VOPC__V_CMPX_CLASS_F32 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.f The function reports true if the floating point value is *any* of + // the numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMP_CLASS_F64 class methods --- + + Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_class_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_CLASS_F64 + + Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64() + { + } // ~Inst_VOPC__V_CMP_CLASS_F64 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.d + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_CLASS_F64 class methods --- + + Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_class_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_CLASS_F64 + + Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64() + { + } // ~Inst_VOPC__V_CMPX_CLASS_F64 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // S0.d The function reports true if the floating point value is *any* of + // the numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + if (bits(src1[lane], 0) || bits(src1[lane], 1)) { + // is NaN + if (std::isnan(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 2)) { + // is -infinity + if (std::isinf(src0[lane]) && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 3)) { + // is -normal + if (std::isnormal(src0[lane]) + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 4)) { + // is -denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 5)) { + // is -zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 6)) { + // is +zero + if (std::fpclassify(src0[lane]) == FP_ZERO + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 7)) { + // is +denormal + if (std::fpclassify(src0[lane]) == FP_SUBNORMAL + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 8)) { + // is +normal + if (std::isnormal(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + if (bits(src1[lane], 9)) { + // is +infinity + if (std::isinf(src0[lane]) + && !std::signbit(src0[lane])) { + vcc.setBit(lane, 1); + continue; + } + } + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMP_CLASS_F16 class methods --- + + Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_class_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_CLASS_F16 + + Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16() + { + } // ~Inst_VOPC__V_CMP_CLASS_F16 + + // --- description from .arch file --- + // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_CLASS_F16 class methods --- + + Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_class_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_CLASS_F16 + + Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16() + { + } // ~Inst_VOPC__V_CMPX_CLASS_F16 + + // --- description from .arch file --- + // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on + // --- S0.f16 + // The function reports true if the floating point value is *any* of the + // --- numeric types selected in S1.u according to the following list: + // S1.u[0] -- value is a signaling NaN. + // S1.u[1] -- value is a quiet NaN. + // S1.u[2] -- value is negative infinity. + // S1.u[3] -- value is a negative normal value. + // S1.u[4] -- value is a negative denormal value. + // S1.u[5] -- value is negative zero. + // S1.u[6] -- value is positive zero. + // S1.u[7] -- value is a positive denormal value. + // S1.u[8] -- value is a positive normal value. + // S1.u[9] -- value is positive infinity. + void + Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_F_F16 class methods --- + + Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_F_F16 + + Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16() + { + } // ~Inst_VOPC__V_CMP_F_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_LT_F16 class methods --- + + Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_LT_F16 + + Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16() + { + } // ~Inst_VOPC__V_CMP_LT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_F16 class methods --- + + Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_EQ_F16 + + Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16() + { + } // ~Inst_VOPC__V_CMP_EQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_LE_F16 class methods --- + + Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_LE_F16 + + Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16() + { + } // ~Inst_VOPC__V_CMP_LE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_GT_F16 class methods --- + + Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_GT_F16 + + Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16() + { + } // ~Inst_VOPC__V_CMP_GT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_LG_F16 class methods --- + + Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lg_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_LG_F16 + + Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16() + { + } // ~Inst_VOPC__V_CMP_LG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_GE_F16 class methods --- + + Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_GE_F16 + + Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16() + { + } // ~Inst_VOPC__V_CMP_GE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_O_F16 class methods --- + + Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_o_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_O_F16 + + Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16() + { + } // ~Inst_VOPC__V_CMP_O_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_U_F16 class methods --- + + Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_u_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_U_F16 + + Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16() + { + } // ~Inst_VOPC__V_CMP_U_F16 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NGE_F16 class methods --- + + Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nge_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NGE_F16 + + Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16() + { + } // ~Inst_VOPC__V_CMP_NGE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NLG_F16 class methods --- + + Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlg_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NLG_F16 + + Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16() + { + } // ~Inst_VOPC__V_CMP_NLG_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NGT_F16 class methods --- + + Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ngt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NGT_F16 + + Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16() + { + } // ~Inst_VOPC__V_CMP_NGT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NLE_F16 class methods --- + + Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nle_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NLE_F16 + + Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16() + { + } // ~Inst_VOPC__V_CMP_NLE_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NEQ_F16 class methods --- + + Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_neq_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NEQ_F16 + + Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16() + { + } // ~Inst_VOPC__V_CMP_NEQ_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_NLT_F16 class methods --- + + Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlt_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_NLT_F16 + + Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16() + { + } // ~Inst_VOPC__V_CMP_NLT_F16 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_TRU_F16 class methods --- + + Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_tru_f16") + { + setFlag(ALU); + setFlag(F16); + } // Inst_VOPC__V_CMP_TRU_F16 + + Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16() + { + } // ~Inst_VOPC__V_CMP_TRU_F16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_F_F16 class methods --- + + Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_F16 + + Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16() + { + } // ~Inst_VOPC__V_CMPX_F_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_F16 class methods --- + + Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_F16 + + Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16() + { + } // ~Inst_VOPC__V_CMPX_LT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_F16 class methods --- + + Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_F16 + + Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16() + { + } // ~Inst_VOPC__V_CMPX_EQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_F16 class methods --- + + Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_F16 + + Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16() + { + } // ~Inst_VOPC__V_CMPX_LE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_F16 class methods --- + + Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_F16 + + Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16() + { + } // ~Inst_VOPC__V_CMPX_GT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_LG_F16 class methods --- + + Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lg_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LG_F16 + + Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16() + { + } // ~Inst_VOPC__V_CMPX_LG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_F16 class methods --- + + Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_F16 + + Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16() + { + } // ~Inst_VOPC__V_CMPX_GE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_O_F16 class methods --- + + Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_o_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_O_F16 + + Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16() + { + } // ~Inst_VOPC__V_CMPX_O_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_U_F16 class methods --- + + Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_u_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_U_F16 + + Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16() + { + } // ~Inst_VOPC__V_CMPX_U_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NGE_F16 class methods --- + + Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nge_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGE_F16 + + Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16() + { + } // ~Inst_VOPC__V_CMPX_NGE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NLG_F16 class methods --- + + Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlg_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLG_F16 + + Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16() + { + } // ~Inst_VOPC__V_CMPX_NLG_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NGT_F16 class methods --- + + Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ngt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGT_F16 + + Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16() + { + } // ~Inst_VOPC__V_CMPX_NGT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NLE_F16 class methods --- + + Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nle_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLE_F16 + + Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16() + { + } // ~Inst_VOPC__V_CMPX_NLE_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NEQ_F16 class methods --- + + Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_neq_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NEQ_F16 + + Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16() + { + } // ~Inst_VOPC__V_CMPX_NEQ_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_NLT_F16 class methods --- + + Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlt_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLT_F16 + + Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16() + { + } // ~Inst_VOPC__V_CMPX_NLT_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMPX_TRU_F16 class methods --- + + Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_tru_f16") + { + setFlag(ALU); + setFlag(F16); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_TRU_F16 + + Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16() + { + } // ~Inst_VOPC__V_CMPX_TRU_F16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst) + { + panicUnimplemented(); + } // execute + // --- Inst_VOPC__V_CMP_F_F32 class methods --- + + Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_F_F32 + + Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32() + { + } // ~Inst_VOPC__V_CMP_F_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_F32 class methods --- + + Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_LT_F32 + + Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32() + { + } // ~Inst_VOPC__V_CMP_LT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_F32 class methods --- + + Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_EQ_F32 + + Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32() + { + } // ~Inst_VOPC__V_CMP_EQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_F32 class methods --- + + Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_LE_F32 + + Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32() + { + } // ~Inst_VOPC__V_CMP_LE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_F32 class methods --- + + Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_GT_F32 + + Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32() + { + } // ~Inst_VOPC__V_CMP_GT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LG_F32 class methods --- + + Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lg_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_LG_F32 + + Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32() + { + } // ~Inst_VOPC__V_CMP_LG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_F32 class methods --- + + Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_GE_F32 + + Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32() + { + } // ~Inst_VOPC__V_CMP_GE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_O_F32 class methods --- + + Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_o_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_O_F32 + + Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32() + { + } // ~Inst_VOPC__V_CMP_O_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_U_F32 class methods --- + + Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_u_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_U_F32 + + Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32() + { + } // ~Inst_VOPC__V_CMP_U_F32 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGE_F32 class methods --- + + Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nge_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NGE_F32 + + Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32() + { + } // ~Inst_VOPC__V_CMP_NGE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLG_F32 class methods --- + + Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlg_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NLG_F32 + + Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32() + { + } // ~Inst_VOPC__V_CMP_NLG_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGT_F32 class methods --- + + Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ngt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NGT_F32 + + Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32() + { + } // ~Inst_VOPC__V_CMP_NGT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLE_F32 class methods --- + + Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nle_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NLE_F32 + + Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32() + { + } // ~Inst_VOPC__V_CMP_NLE_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NEQ_F32 class methods --- + + Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_neq_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NEQ_F32 + + Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32() + { + } // ~Inst_VOPC__V_CMP_NEQ_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLT_F32 class methods --- + + Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlt_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_NLT_F32 + + Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32() + { + } // ~Inst_VOPC__V_CMP_NLT_F32 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_TRU_F32 class methods --- + + Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_tru_f32") + { + setFlag(ALU); + setFlag(F32); + } // Inst_VOPC__V_CMP_TRU_F32 + + Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32() + { + } // ~Inst_VOPC__V_CMP_TRU_F32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_F32 class methods --- + + Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_F32 + + Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32() + { + } // ~Inst_VOPC__V_CMPX_F_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_F32 class methods --- + + Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_F32 + + Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32() + { + } // ~Inst_VOPC__V_CMPX_LT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_F32 class methods --- + + Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_F32 + + Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32() + { + } // ~Inst_VOPC__V_CMPX_EQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_F32 class methods --- + + Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_F32 + + Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32() + { + } // ~Inst_VOPC__V_CMPX_LE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_F32 class methods --- + + Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_F32 + + Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32() + { + } // ~Inst_VOPC__V_CMPX_GT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LG_F32 class methods --- + + Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lg_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LG_F32 + + Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32() + { + } // ~Inst_VOPC__V_CMPX_LG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_F32 class methods --- + + Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_F32 + + Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32() + { + } // ~Inst_VOPC__V_CMPX_GE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_O_F32 class methods --- + + Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_o_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_O_F32 + + Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32() + { + } // ~Inst_VOPC__V_CMPX_O_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_U_F32 class methods --- + + Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_u_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_U_F32 + + Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32() + { + } // ~Inst_VOPC__V_CMPX_U_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NGE_F32 class methods --- + + Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nge_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGE_F32 + + Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32() + { + } // ~Inst_VOPC__V_CMPX_NGE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NLG_F32 class methods --- + + Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlg_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLG_F32 + + Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32() + { + } // ~Inst_VOPC__V_CMPX_NLG_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NGT_F32 class methods --- + + Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ngt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGT_F32 + + Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32() + { + } // ~Inst_VOPC__V_CMPX_NGT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NLE_F32 class methods --- + + Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nle_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLE_F32 + + Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32() + { + } // ~Inst_VOPC__V_CMPX_NLE_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NEQ_F32 class methods --- + + Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_neq_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NEQ_F32 + + Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32() + { + } // ~Inst_VOPC__V_CMPX_NEQ_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_NLT_F32 class methods --- + + Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlt_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLT_F32 + + Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32() + { + } // ~Inst_VOPC__V_CMPX_NLT_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_TRU_F32 class methods --- + + Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_tru_f32") + { + setFlag(ALU); + setFlag(F32); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_TRU_F32 + + Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32() + { + } // ~Inst_VOPC__V_CMPX_TRU_F32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMP_F_F64 class methods --- + + Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_F_F64 + + Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64() + { + } // ~Inst_VOPC__V_CMP_F_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_F64 class methods --- + + Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_LT_F64 + + Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64() + { + } // ~Inst_VOPC__V_CMP_LT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_F64 class methods --- + + Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_EQ_F64 + + Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64() + { + } // ~Inst_VOPC__V_CMP_EQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_F64 class methods --- + + Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_LE_F64 + + Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64() + { + } // ~Inst_VOPC__V_CMP_LE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_F64 class methods --- + + Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_GT_F64 + + Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64() + { + } // ~Inst_VOPC__V_CMP_GT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LG_F64 class methods --- + + Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lg_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_LG_F64 + + Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64() + { + } // ~Inst_VOPC__V_CMP_LG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_F64 class methods --- + + Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_GE_F64 + + Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64() + { + } // ~Inst_VOPC__V_CMP_GE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_O_F64 class methods --- + + Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_o_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_O_F64 + + Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64() + { + } // ~Inst_VOPC__V_CMP_O_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_U_F64 class methods --- + + Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_u_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_U_F64 + + Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64() + { + } // ~Inst_VOPC__V_CMP_U_F64 + + // --- description from .arch file --- + // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGE_F64 class methods --- + + Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nge_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NGE_F64 + + Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64() + { + } // ~Inst_VOPC__V_CMP_NGE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLG_F64 class methods --- + + Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlg_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NLG_F64 + + Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64() + { + } // ~Inst_VOPC__V_CMP_NLG_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NGT_F64 class methods --- + + Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ngt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NGT_F64 + + Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64() + { + } // ~Inst_VOPC__V_CMP_NGT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLE_F64 class methods --- + + Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nle_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NLE_F64 + + Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64() + { + } // ~Inst_VOPC__V_CMP_NLE_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NEQ_F64 class methods --- + + Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_neq_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NEQ_F64 + + Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64() + { + } // ~Inst_VOPC__V_CMP_NEQ_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NLT_F64 class methods --- + + Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_nlt_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_NLT_F64 + + Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64() + { + } // ~Inst_VOPC__V_CMP_NLT_F64 + + // --- description from .arch file --- + // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_TRU_F64 class methods --- + + Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_tru_f64") + { + setFlag(ALU); + setFlag(F64); + } // Inst_VOPC__V_CMP_TRU_F64 + + Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64() + { + } // ~Inst_VOPC__V_CMP_TRU_F64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_F64 class methods --- + + Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_F64 + + Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64() + { + } // ~Inst_VOPC__V_CMPX_F_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_F64 class methods --- + + Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_F64 + + Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64() + { + } // ~Inst_VOPC__V_CMPX_LT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_F64 class methods --- + + Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_F64 + + Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64() + { + } // ~Inst_VOPC__V_CMPX_EQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + wf->execMask() = vcc.rawData(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_F64 class methods --- + + Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_F64 + + Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64() + { + } // ~Inst_VOPC__V_CMPX_LE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_F64 class methods --- + + Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_F64 + + Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64() + { + } // ~Inst_VOPC__V_CMPX_GT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LG_F64 class methods --- + + Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lg_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LG_F64 + + Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64() + { + } // ~Inst_VOPC__V_CMPX_LG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_F64 class methods --- + + Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_F64 + + Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64() + { + } // ~Inst_VOPC__V_CMPX_GE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_O_F64 class methods --- + + Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_o_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_O_F64 + + Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64() + { + } // ~Inst_VOPC__V_CMPX_O_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (!std::isnan(src0[lane]) + && !std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_U_F64 class methods --- + + Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_u_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_U_F64 + + Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64() + { + } // ~Inst_VOPC__V_CMPX_U_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC + // encoding. + void + Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, (std::isnan(src0[lane]) + || std::isnan(src1[lane])) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NGE_F64 class methods --- + + Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nge_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGE_F64 + + Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64() + { + } // ~Inst_VOPC__V_CMPX_NGE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NLG_F64 class methods --- + + Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlg_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLG_F64 + + Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64() + { + } // ~Inst_VOPC__V_CMPX_NLG_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane] + || src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NGT_F64 class methods --- + + Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ngt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NGT_F64 + + Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64() + { + } // ~Inst_VOPC__V_CMPX_NGT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NLE_F64 class methods --- + + Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nle_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLE_F64 + + Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64() + { + } // ~Inst_VOPC__V_CMPX_NLE_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NEQ_F64 class methods --- + + Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_neq_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NEQ_F64 + + Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64() + { + } // ~Inst_VOPC__V_CMPX_NEQ_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NLT_F64 class methods --- + + Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_nlt_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NLT_F64 + + Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64() + { + } // ~Inst_VOPC__V_CMPX_NLT_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_TRU_F64 class methods --- + + Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_tru_f64") + { + setFlag(ALU); + setFlag(F64); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_TRU_F64 + + Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64() + { + } // ~Inst_VOPC__V_CMPX_TRU_F64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_I16 class methods --- + + Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_I16 + + Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16() + { + } // ~Inst_VOPC__V_CMP_F_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_I16 class methods --- + + Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_I16 + + Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16() + { + } // ~Inst_VOPC__V_CMP_LT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_I16 class methods --- + + Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_I16 + + Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16() + { + } // ~Inst_VOPC__V_CMP_EQ_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_I16 class methods --- + + Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_I16 + + Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16() + { + } // ~Inst_VOPC__V_CMP_LE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_I16 class methods --- + + Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_I16 + + Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16() + { + } // ~Inst_VOPC__V_CMP_GT_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_I16 class methods --- + + Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_I16 + + Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16() + { + } // ~Inst_VOPC__V_CMP_NE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_I16 class methods --- + + Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_I16 + + Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16() + { + } // ~Inst_VOPC__V_CMP_GE_I16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_I16 class methods --- + + Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_i16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_I16 + + Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16() + { + } // ~Inst_VOPC__V_CMP_T_I16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_U16 class methods --- + + Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_U16 + + Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16() + { + } // ~Inst_VOPC__V_CMP_F_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_U16 class methods --- + + Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_U16 + + Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16() + { + } // ~Inst_VOPC__V_CMP_LT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_U16 class methods --- + + Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_U16 + + Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16() + { + } // ~Inst_VOPC__V_CMP_EQ_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_U16 class methods --- + + Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_U16 + + Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16() + { + } // ~Inst_VOPC__V_CMP_LE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_U16 class methods --- + + Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_U16 + + Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16() + { + } // ~Inst_VOPC__V_CMP_GT_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_U16 class methods --- + + Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_U16 + + Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16() + { + } // ~Inst_VOPC__V_CMP_NE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_U16 class methods --- + + Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_U16 + + Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16() + { + } // ~Inst_VOPC__V_CMP_GE_U16 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_U16 class methods --- + + Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_u16") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_U16 + + Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16() + { + } // ~Inst_VOPC__V_CMP_T_U16 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_I16 class methods --- + + Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_I16 + + Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16() + { + } // ~Inst_VOPC__V_CMPX_F_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_I16 class methods --- + + Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_I16 + + Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16() + { + } // ~Inst_VOPC__V_CMPX_LT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_I16 class methods --- + + Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_I16 + + Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16() + { + } // ~Inst_VOPC__V_CMPX_EQ_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_I16 class methods --- + + Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_I16 + + Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16() + { + } // ~Inst_VOPC__V_CMPX_LE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_I16 class methods --- + + Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_I16 + + Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16() + { + } // ~Inst_VOPC__V_CMPX_GT_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_I16 class methods --- + + Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_I16 + + Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16() + { + } // ~Inst_VOPC__V_CMPX_NE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_I16 class methods --- + + Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_I16 + + Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16() + { + } // ~Inst_VOPC__V_CMPX_GE_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_I16 class methods --- + + Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_i16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_I16 + + Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16() + { + } // ~Inst_VOPC__V_CMPX_T_I16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_U16 class methods --- + + Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_U16 + + Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16() + { + } // ~Inst_VOPC__V_CMPX_F_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_U16 class methods --- + + Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_U16 + + Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16() + { + } // ~Inst_VOPC__V_CMPX_LT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_U16 class methods --- + + Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_U16 + + Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16() + { + } // ~Inst_VOPC__V_CMPX_EQ_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_U16 class methods --- + + Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_U16 + + Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16() + { + } // ~Inst_VOPC__V_CMPX_LE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_U16 class methods --- + + Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_U16 + + Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16() + { + } // ~Inst_VOPC__V_CMPX_GT_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_U16 class methods --- + + Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_U16 + + Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16() + { + } // ~Inst_VOPC__V_CMPX_NE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_U16 class methods --- + + Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_U16 + + Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16() + { + } // ~Inst_VOPC__V_CMPX_GE_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU16 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_U16 class methods --- + + Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_u16") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_U16 + + Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16() + { + } // ~Inst_VOPC__V_CMPX_T_U16 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_I32 class methods --- + + Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_I32 + + Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32() + { + } // ~Inst_VOPC__V_CMP_F_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_I32 class methods --- + + Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_I32 + + Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32() + { + } // ~Inst_VOPC__V_CMP_LT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_I32 class methods --- + + Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_I32 + + Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32() + { + } // ~Inst_VOPC__V_CMP_EQ_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_I32 class methods --- + + Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_I32 + + Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32() + { + } // ~Inst_VOPC__V_CMP_LE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_I32 class methods --- + + Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_I32 + + Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32() + { + } // ~Inst_VOPC__V_CMP_GT_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_I32 class methods --- + + Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_I32 + + Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32() + { + } // ~Inst_VOPC__V_CMP_NE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_I32 class methods --- + + Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_I32 + + Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32() + { + } // ~Inst_VOPC__V_CMP_GE_I32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_I32 class methods --- + + Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_i32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_I32 + + Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32() + { + } // ~Inst_VOPC__V_CMP_T_I32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_U32 class methods --- + + Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_U32 + + Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32() + { + } // ~Inst_VOPC__V_CMP_F_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_U32 class methods --- + + Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_U32 + + Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32() + { + } // ~Inst_VOPC__V_CMP_LT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_U32 class methods --- + + Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_U32 + + Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32() + { + } // ~Inst_VOPC__V_CMP_EQ_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_U32 class methods --- + + Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_U32 + + Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32() + { + } // ~Inst_VOPC__V_CMP_LE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_U32 class methods --- + + Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_U32 + + Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32() + { + } // ~Inst_VOPC__V_CMP_GT_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_U32 class methods --- + + Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_U32 + + Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32() + { + } // ~Inst_VOPC__V_CMP_NE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_U32 class methods --- + + Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_U32 + + Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32() + { + } // ~Inst_VOPC__V_CMP_GE_U32 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_U32 class methods --- + + Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_u32") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_U32 + + Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32() + { + } // ~Inst_VOPC__V_CMP_T_U32 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_I32 class methods --- + + Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_I32 + + Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32() + { + } // ~Inst_VOPC__V_CMPX_F_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_I32 class methods --- + + Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_I32 + + Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32() + { + } // ~Inst_VOPC__V_CMPX_LT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_I32 class methods --- + + Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_I32 + + Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32() + { + } // ~Inst_VOPC__V_CMPX_EQ_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_I32 class methods --- + + Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_I32 + + Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32() + { + } // ~Inst_VOPC__V_CMPX_LE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_I32 class methods --- + + Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_I32 + + Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32() + { + } // ~Inst_VOPC__V_CMPX_GT_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_I32 class methods --- + + Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_I32 + + Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32() + { + } // ~Inst_VOPC__V_CMPX_NE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_I32 class methods --- + + Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_I32 + + Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32() + { + } // ~Inst_VOPC__V_CMPX_GE_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_I32 class methods --- + + Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_i32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_I32 + + Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32() + { + } // ~Inst_VOPC__V_CMPX_T_I32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_U32 class methods --- + + Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_U32 + + Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32() + { + } // ~Inst_VOPC__V_CMPX_F_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_U32 class methods --- + + Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_U32 + + Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32() + { + } // ~Inst_VOPC__V_CMPX_LT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_U32 class methods --- + + Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_U32 + + Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32() + { + } // ~Inst_VOPC__V_CMPX_EQ_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_U32 class methods --- + + Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_U32 + + Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32() + { + } // ~Inst_VOPC__V_CMPX_LE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_U32 class methods --- + + Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_U32 + + Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32() + { + } // ~Inst_VOPC__V_CMPX_GT_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_U32 class methods --- + + Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_U32 + + Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32() + { + } // ~Inst_VOPC__V_CMPX_NE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_U32 class methods --- + + Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_U32 + + Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32() + { + } // ~Inst_VOPC__V_CMPX_GE_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU32 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_U32 class methods --- + + Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_u32") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_U32 + + Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32() + { + } // ~Inst_VOPC__V_CMPX_T_U32 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_I64 class methods --- + + Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_I64 + + Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64() + { + } // ~Inst_VOPC__V_CMP_F_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_I64 class methods --- + + Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_I64 + + Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64() + { + } // ~Inst_VOPC__V_CMP_LT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_I64 class methods --- + + Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_I64 + + Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64() + { + } // ~Inst_VOPC__V_CMP_EQ_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_I64 class methods --- + + Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_I64 + + Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64() + { + } // ~Inst_VOPC__V_CMP_LE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_I64 class methods --- + + Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_I64 + + Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64() + { + } // ~Inst_VOPC__V_CMP_GT_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_I64 class methods --- + + Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_I64 + + Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64() + { + } // ~Inst_VOPC__V_CMP_NE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_I64 class methods --- + + Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_I64 + + Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64() + { + } // ~Inst_VOPC__V_CMP_GE_I64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_I64 class methods --- + + Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_i64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_I64 + + Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64() + { + } // ~Inst_VOPC__V_CMP_T_I64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_F_U64 class methods --- + + Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_f_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_F_U64 + + Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64() + { + } // ~Inst_VOPC__V_CMP_F_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LT_U64 class methods --- + + Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_lt_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LT_U64 + + Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64() + { + } // ~Inst_VOPC__V_CMP_LT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_EQ_U64 class methods --- + + Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_eq_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_EQ_U64 + + Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64() + { + } // ~Inst_VOPC__V_CMP_EQ_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_LE_U64 class methods --- + + Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_le_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_LE_U64 + + Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64() + { + } // ~Inst_VOPC__V_CMP_LE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GT_U64 class methods --- + + Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_gt_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GT_U64 + + Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64() + { + } // ~Inst_VOPC__V_CMP_GT_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_NE_U64 class methods --- + + Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ne_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_NE_U64 + + Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64() + { + } // ~Inst_VOPC__V_CMP_NE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_GE_U64 class methods --- + + Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_ge_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_GE_U64 + + Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64() + { + } // ~Inst_VOPC__V_CMP_GE_U64 + + // --- description from .arch file --- + // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMP_T_U64 class methods --- + + Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmp_t_u64") + { + setFlag(ALU); + } // Inst_VOPC__V_CMP_T_U64 + + Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64() + { + } // ~Inst_VOPC__V_CMP_T_U64 + + // --- description from .arch file --- + // D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_I64 class methods --- + + Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_I64 + + Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64() + { + } // ~Inst_VOPC__V_CMPX_F_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_I64 class methods --- + + Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_I64 + + Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64() + { + } // ~Inst_VOPC__V_CMPX_LT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_I64 class methods --- + + Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_I64 + + Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64() + { + } // ~Inst_VOPC__V_CMPX_EQ_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_I64 class methods --- + + Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_I64 + + Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64() + { + } // ~Inst_VOPC__V_CMPX_LE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_I64 class methods --- + + Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_I64 + + Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64() + { + } // ~Inst_VOPC__V_CMPX_GT_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_I64 class methods --- + + Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_I64 + + Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64() + { + } // ~Inst_VOPC__V_CMPX_NE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_I64 class methods --- + + Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_I64 + + Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64() + { + } // ~Inst_VOPC__V_CMPX_GE_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandI64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_I64 class methods --- + + Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_i64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_I64 + + Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64() + { + } // ~Inst_VOPC__V_CMPX_T_I64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_F_U64 class methods --- + + Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_f_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_F_U64 + + Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64() + { + } // ~Inst_VOPC__V_CMPX_F_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LT_U64 class methods --- + + Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_lt_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LT_U64 + + Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64() + { + } // ~Inst_VOPC__V_CMPX_LT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_EQ_U64 class methods --- + + Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_eq_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_EQ_U64 + + Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64() + { + } // ~Inst_VOPC__V_CMPX_EQ_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_LE_U64 class methods --- + + Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_le_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_LE_U64 + + Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64() + { + } // ~Inst_VOPC__V_CMPX_LE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GT_U64 class methods --- + + Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_gt_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GT_U64 + + Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64() + { + } // ~Inst_VOPC__V_CMPX_GT_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_NE_U64 class methods --- + + Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ne_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_NE_U64 + + Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64() + { + } // ~Inst_VOPC__V_CMPX_NE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_GE_U64 class methods --- + + Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_ge_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_GE_U64 + + Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64() + { + } // ~Inst_VOPC__V_CMPX_GE_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src0(gpuDynInst, instData.SRC0); + ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + src0.readSrc(); + src1.read(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute + // --- Inst_VOPC__V_CMPX_T_U64 class methods --- + + Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt) + : Inst_VOPC(iFmt, "v_cmpx_t_u64") + { + setFlag(ALU); + setFlag(WritesEXEC); + } // Inst_VOPC__V_CMPX_T_U64 + + Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64() + { + } // ~Inst_VOPC__V_CMPX_T_U64 + + // --- description from .arch file --- + // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding. + void + Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vcc.setBit(lane, 1); + } + } + + wf->execMask() = vcc.rawData(); + vcc.write(); + } // execute +} // namespace VegaISA +} // namespace gem5 diff --git a/src/arch/amdgpu/vega/operand.hh b/src/arch/amdgpu/vega/operand.hh index 1760bd7213..1bb9b43d1f 100644 --- a/src/arch/amdgpu/vega/operand.hh +++ b/src/arch/amdgpu/vega/operand.hh @@ -37,6 +37,7 @@ #include "arch/amdgpu/vega/gpu_registers.hh" #include "arch/generic/vec_reg.hh" #include "gpu-compute/scalar_register_file.hh" +#include "gpu-compute/shader.hh" #include "gpu-compute/vector_register_file.hh" #include "gpu-compute/wavefront.hh" @@ -489,7 +490,7 @@ namespace VegaISA typename std::enable_if::type setBit(int bit, int bit_val) { - DataType &sgpr = *((DataType*)srfData.data()); + GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data()); replaceBits(sgpr, bit, bit_val); } @@ -513,15 +514,49 @@ namespace VegaISA { assert(NumDwords == 1 || NumDwords == 2); + if (_opIdx >= REG_INT_CONST_POS_MIN && + _opIdx <= REG_INT_CONST_NEG_MAX) { + assert(sizeof(DataType) <= sizeof(srfData)); + DataType misc_val(0); + assert(isConstVal(_opIdx)); + misc_val = (DataType)_gpuDynInst + ->readConstVal(_opIdx); + std::memcpy((void*)srfData.data(), (void*)&misc_val, + sizeof(DataType)); + + return; + } + + if (_opIdx == REG_M0 || _opIdx == REG_ZERO || _opIdx == REG_SCC) { + assert(sizeof(DataType) <= sizeof(srfData)); + DataType misc_val(0); + misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx); + std::memcpy((void*)srfData.data(), (void*)&misc_val, + sizeof(DataType)); + + return; + } + switch(_opIdx) { case REG_EXEC_LO: { - ScalarRegU64 exec_mask = _gpuDynInst->wavefront()-> - execMask().to_ullong(); - std::memcpy((void*)srfData.data(), (void*)&exec_mask, - sizeof(exec_mask)); - DPRINTF(GPUSRF, "Read EXEC\n"); - DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask); + if constexpr (NumDwords == 2) { + ScalarRegU64 exec_mask = _gpuDynInst->wavefront()-> + execMask().to_ullong(); + std::memcpy((void*)srfData.data(), (void*)&exec_mask, + sizeof(exec_mask)); + DPRINTF(GPUSRF, "Read EXEC\n"); + DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask); + } else { + ScalarRegU64 exec_mask = _gpuDynInst->wavefront()-> + execMask().to_ullong(); + + ScalarRegU32 exec_mask_lo = bits(exec_mask, 31, 0); + std::memcpy((void*)srfData.data(), + (void*)&exec_mask_lo, sizeof(exec_mask_lo)); + DPRINTF(GPUSRF, "Read EXEC_LO\n"); + DPRINTF(GPUSRF, "EXEC_LO = %#x\n", exec_mask_lo); + } } break; case REG_EXEC_HI: @@ -544,8 +579,83 @@ namespace VegaISA case REG_SRC_SWDA: case REG_SRC_DPP: case REG_SRC_LITERAL: - assert(NumDwords == 1); + /** + * From the Vega specification: + * When a literal constant is used with a 64 bit instruction, + * the literal is expanded to 64 bits by: padding the LSBs + * with zeros for floats, padding the MSBs with zeros for + * unsigned ints, and by sign-extending signed ints. + */ srfData[0] = _gpuDynInst->srcLiteral(); + if constexpr (NumDwords == 2) { + if constexpr (std::is_integral_v) { + if constexpr (std::is_signed_v) { + if (bits(srfData[0], 31, 31) == 1) { + srfData[1] = 0xffffffff; + } else { + srfData[1] = 0; + } + } else { + srfData[1] = 0; + } + } else { + srfData[1] = _gpuDynInst->srcLiteral(); + srfData[0] = 0; + } + } + break; + case REG_SHARED_BASE: + { + assert(NumDwords == 2); + if constexpr (NumDwords == 2) { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 shared_base = cu->shader->ldsApe().base; + std::memcpy((void*)srfData.data(), (void*)&shared_base, + sizeof(srfData)); + DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n", + shared_base); + } + } + break; + case REG_SHARED_LIMIT: + { + assert(NumDwords == 2); + if constexpr (NumDwords == 2) { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 shared_limit = cu->shader->ldsApe().limit; + std::memcpy((void*)srfData.data(), + (void*)&shared_limit, sizeof(srfData)); + DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n", + shared_limit); + } + } + break; + case REG_PRIVATE_BASE: + { + assert(NumDwords == 2); + if constexpr (NumDwords == 2) { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 priv_base = cu->shader->scratchApe().base; + std::memcpy((void*)srfData.data(), (void*)&priv_base, + sizeof(srfData)); + DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n", + priv_base); + } + } + break; + case REG_PRIVATE_LIMIT: + { + assert(NumDwords == 2); + if constexpr (NumDwords == 2) { + ComputeUnit *cu = _gpuDynInst->computeUnit(); + ScalarRegU64 priv_limit = + cu->shader->scratchApe().limit; + std::memcpy((void*)srfData.data(), (void*)&priv_limit, + sizeof(srfData)); + DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n", + priv_limit); + } + } break; case REG_POS_HALF: { @@ -617,18 +727,8 @@ namespace VegaISA } break; default: - { - assert(sizeof(DataType) <= sizeof(srfData)); - DataType misc_val(0); - if (isConstVal(_opIdx)) { - misc_val = (DataType)_gpuDynInst - ->readConstVal(_opIdx); - } else { - misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx); - } - std::memcpy((void*)srfData.data(), (void*)&misc_val, - sizeof(DataType)); - } + panic("Invalid special register index: %d\n", _opIdx); + break; } } @@ -674,7 +774,7 @@ namespace VegaISA * of a register is 1 dword. this class will take care to do the * proper packing/unpacking of sub-dword operands. */ - std::array srfData; + GEM5_ALIGNED(8) std::array srfData; }; // typedefs for the various sizes/types of scalar operands @@ -735,6 +835,142 @@ namespace VegaISA using ConstVecOperandU128 = VecOperand; using ConstVecOperandU256 = VecOperand; using ConstVecOperandU512 = VecOperand; + + +// Helper class for using multiple VecElemU32 to represent data types which +// do not divide a dword evenly. +template +class PackedReg +{ + // Logical view is: + // dword N, dword N - 1, ..., dword 1, dword 0. + // Within each dword, the element starts at [ELEM_SIZE:0]. For example, + // for ELEM_SIZE = 6 for fp6 types, [5:0] is the first value, [11:6] is + // the second, and so forth. For 6 bits specifically, the 6th element + // spans dword 0 and dword 1. + static_assert(BITS % 32 == 0); + static_assert(BITS % ELEM_SIZE == 0); + static_assert(ELEM_SIZE <= 32); + + static constexpr int NumDwords = BITS / 32; + uint32_t dwords[NumDwords] = {}; + + public: + PackedReg() = default; + + void + setDword(int dw, uint32_t value) + { + assert(dw < NumDwords); + dwords[dw] = value; + } + + uint32_t + getDword(int dw) + { + assert(dw < NumDwords); + return dwords[dw]; + } + + uint32_t + getElem(int elem) + { + assert(elem < (BITS / ELEM_SIZE)); + + // Get the upper/lower *bit* location of the element. + int ubit, lbit; + ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1); + lbit = elem * ELEM_SIZE; + + // Convert the bit locations to upper/lower dwords. It is possible + // to span two dwords but this does not have to support spanning + // more than two dwords. + int udw, ldw; + udw = ubit / 32; + ldw = lbit / 32; + assert(udw == ldw || udw == ldw + 1); + + if (udw == ldw) { + // Easy case, just shift the dword value and mask to get value. + int dw_lbit = lbit % 32; + + uint32_t elem_mask = (1UL << ELEM_SIZE) - 1; + uint32_t rv = (dwords[ldw] >> dw_lbit) & elem_mask; + + return rv; + } + + // Harder case. To make it easier put into a quad word and shift + // that variable instead of trying to work with two. + uint64_t qword = + uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]); + + int qw_lbit = lbit % 32; + + uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1; + uint32_t rv = uint32_t((qword >> qw_lbit) & elem_mask); + + return rv; + } + + void + setElem(int elem, uint32_t value) + { + assert(elem < (BITS / ELEM_SIZE)); + + // Get the upper/lower *bit* location of the element. + int ubit, lbit; + ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1); + lbit = elem * ELEM_SIZE; + + // Convert the bit locations to upper/lower dwords. It is possible + // to span two dwords but this does not have to support spanning + // more than two dwords. + int udw, ldw; + udw = ubit / 32; + ldw = lbit / 32; + assert(udw == ldw || udw == ldw + 1); + + if (udw == ldw) { + // Easy case, just shift the dword value and mask to get value. + int dw_lbit = lbit % 32; + + // Make sure the value is not going to clobber another element. + uint32_t elem_mask = (1UL << ELEM_SIZE) - 1; + value &= elem_mask; + + // Clear the bits we are setting. + elem_mask <<= dw_lbit; + dwords[ldw] &= ~elem_mask; + + value <<= dw_lbit; + dwords[ldw] |= value; + + return; + } + + // Harder case. Put the two dwords in a quad word and manipulate that. + // Then place the two new dwords back into the storage. + uint64_t qword = + uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]); + + int qw_lbit = lbit % 32; + + // Make sure the value is not going to clobber another element. + uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1; + value &= elem_mask; + + elem_mask <<= qw_lbit; + qword &= elem_mask; + + value <<= qw_lbit; + qword |= value; + + dwords[udw] = uint32_t(qword >> 32); + dwords[ldw] = uint32_t(qword & mask(32)); + } +}; + } } // namespace gem5 diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py index 8e8d2b641c..8b9cf25b25 100644 --- a/src/arch/arm/ArmISA.py +++ b/src/arch/arm/ArmISA.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013, 2015-2022 ARM Limited +# Copyright (c) 2012-2013, 2015-2022, 2024 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -52,7 +52,11 @@ class DecoderFlavor(Enum): class ArmDefaultSERelease(ArmRelease): extensions = [ - "CRYPTO", + "FEAT_AES", + "FEAT_PMULL", + "FEAT_SHA1", + "FEAT_SHA256", + "FEAT_CRC32", # Armv8.1 "FEAT_LSE", "FEAT_RDM", @@ -166,6 +170,10 @@ class ArmISA(BaseISA): 0x0000000000010010, "AArch64 Memory Model Feature Register 2" ) + # HAS_SDEFLT | HAS_FORCE_NS | HAS_TIDR | PMG_MAX = 128 | + # VPMR_MAX = 7 | HAS_HCR | PARTID_MAX = 256 + mpamidr_el1 = Param.UInt64(0x34000080001E0100, "MPAM ID Register (EL1)") + # Any access (read/write) to an unimplemented # Implementation Defined registers is not causing an Undefined Instruction. # It is rather executed as a NOP. diff --git a/src/arch/arm/ArmSemihosting.py b/src/arch/arm/ArmSemihosting.py index 8c8375e208..ffc285fc8f 100644 --- a/src/arch/arm/ArmSemihosting.py +++ b/src/arch/arm/ArmSemihosting.py @@ -33,36 +33,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from m5.objects.Serial import SerialDevice -from m5.objects.Terminal import Terminal -from m5.params import * -from m5.SimObject import * +from m5.objects.BaseSemihosting import BaseSemihosting -class ArmSemihosting(SimObject): +class ArmSemihosting(BaseSemihosting): type = "ArmSemihosting" cxx_header = "arch/arm/semihosting.hh" cxx_class = "gem5::ArmSemihosting" - - cmd_line = Param.String("", "Command line to report to guest") - stdin = Param.String("stdin", "Standard input (stdin for gem5's terminal)") - stdout = Param.String( - "stdout", "Standard output (stdout for gem5's terminal)" - ) - stderr = Param.String( - "stderr", "Standard error (stderr for gem5's terminal)" - ) - files_root_dir = Param.String( - "", "Host root directory for files handled by Semihosting" - ) - - mem_reserve = Param.MemorySize( - "32MiB", - "Amount of memory to reserve at the start of the address map. This " - "memory won't be used by the heap reported to an application.", - ) - stack_size = Param.MemorySize("32MiB", "Application stack size") - - time = Param.Time( - "01/01/2009", "System time to use ('Now' for actual time)" - ) diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py index dc138cafc3..94cbb496aa 100644 --- a/src/arch/arm/ArmSystem.py +++ b/src/arch/arm/ArmSystem.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009, 2012-2013, 2015-2023 Arm Limited +# Copyright (c) 2009, 2012-2013, 2015-2024 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -65,6 +65,11 @@ class SmeVectorLength(UInt8): class ArmExtension(ScopedEnum): vals = [ + "FEAT_AES", + "FEAT_PMULL", + "FEAT_SHA1", + "FEAT_SHA256", + "FEAT_CRC32", # Armv8.1 "FEAT_VHE", "FEAT_PAN", @@ -109,8 +114,8 @@ class ArmExtension(ScopedEnum): "SECURITY", "LPAE", "VIRTUALIZATION", - "CRYPTO", "TME", + "FEAT_MPAM", ] @@ -159,7 +164,16 @@ class ArmRelease(SimObject): class Armv8(ArmRelease): - extensions = ["LPAE", "VIRTUALIZATION", "SECURITY"] + extensions = [ + "LPAE", + "VIRTUALIZATION", + "SECURITY", + "FEAT_AES", + "FEAT_PMULL", + "FEAT_SHA1", + "FEAT_SHA256", + "FEAT_CRC32", + ] class ArmDefaultRelease(Armv8): diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript index 0aa4e66659..948cefb4e4 100644 --- a/src/arch/arm/SConscript +++ b/src/arch/arm/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2009, 2012-2013, 2017-2018, 2020 ARM Limited +# Copyright (c) 2009, 2012-2013, 2017-2018, 2020, 2024 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -40,6 +40,8 @@ Import('*') +Source('insts/fplib.cc') + if env['CONF']['USE_ARM_ISA']: env.TagImplies('arm isa', 'gem5 lib') @@ -73,7 +75,6 @@ Source('insts/static_inst.cc', tags='arm isa') Source('insts/sve.cc', tags='arm isa') Source('insts/sve_mem.cc', tags='arm isa') Source('insts/vfp.cc', tags='arm isa') -Source('insts/fplib.cc', tags='arm isa') Source('insts/crypto.cc', tags='arm isa') Source('insts/tme64.cc', tags='arm isa') if env['CONF']['PROTOCOL'] == 'MESI_Three_Level_HTM': @@ -92,6 +93,7 @@ Source('fs_workload.cc', tags='arm isa') Source('regs/int.cc', tags='arm isa') Source('regs/misc.cc', tags='arm isa') Source('mmu.cc', tags='arm isa') +Source('mpam.cc', tags='arm isa') Source('nativetrace.cc', tags='arm isa') Source('pagetable.cc', tags='arm isa') Source('pauth_helpers.cc', tags='arm isa') @@ -132,7 +134,7 @@ SimObject('ArmCPU.py', sim_objects=[], tags='arm isa') DebugFlag('Arm', tags='arm isa') DebugFlag('ArmTme', 'Transactional Memory Extension', tags='arm isa') -DebugFlag('Semihosting', tags='arm isa') +DebugFlag('MPAM', 'MPAM debug flag', tags='arm isa') DebugFlag('PMUVerbose', "Performance Monitor", tags='arm isa') # Add files generated by the ISA description. diff --git a/src/arch/arm/fastmodel/remote_gdb.cc b/src/arch/arm/fastmodel/remote_gdb.cc index 555439ed75..0999f40ddd 100644 --- a/src/arch/arm/fastmodel/remote_gdb.cc +++ b/src/arch/arm/fastmodel/remote_gdb.cc @@ -63,7 +63,7 @@ FastmodelRemoteGDB::AArch64GdbRegCache::setRegs(ThreadContext *context) const FastmodelRemoteGDB::FastmodelRemoteGDB(System *_system, ListenSocketConfig _listen_config) - : gem5::ArmISA::RemoteGDB(_system, _listen_config) + : gem5::ArmISA::RemoteGDB(_system, _listen_config), regCache64(this) { } diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc index 4b906f226f..4c5e2111f4 100644 --- a/src/arch/arm/faults.cc +++ b/src/arch/arm/faults.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2014, 2016-2019, 2022 Arm Limited + * Copyright (c) 2010, 2012-2014, 2016-2019, 2022, 2024 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #include "arch/arm/insts/static_inst.hh" #include "arch/arm/interrupts.hh" #include "arch/arm/isa.hh" +#include "arch/arm/regs/misc_accessors.hh" #include "arch/arm/self_debug.hh" #include "arch/arm/system.hh" #include "arch/arm/utility.hh" @@ -378,22 +379,6 @@ ArmFault::getSyndromeReg64() const } } -MiscRegIndex -ArmFault::getFaultAddrReg64() const -{ - switch (toEL) { - case EL1: - return MISCREG_FAR_EL1; - case EL2: - return MISCREG_FAR_EL2; - case EL3: - return MISCREG_FAR_EL3; - default: - panic("Invalid exception level"); - break; - } -} - void ArmFault::setSyndrome(ThreadContext *tc, MiscRegIndex syndrome_reg) { @@ -1113,13 +1098,15 @@ AbortFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) if (stage2) { // stage 2 fault, set HPFAR_EL2 to the faulting IPA // and FAR_EL2 to the Original VA - tc->setMiscReg(AbortFault::getFaultAddrReg64(), OVAddr); + misc_regs::writeRegister( + tc, OVAddr, this->toEL); tc->setMiscReg(MISCREG_HPFAR_EL2, bits(faultAddr, 47, 12) << 4); DPRINTF(Faults, "Abort Fault (Stage 2) VA: 0x%x IPA: 0x%x\n", OVAddr, faultAddr); } else { - tc->setMiscReg(AbortFault::getFaultAddrReg64(), faultAddr); + misc_regs::writeRegister( + tc, faultAddr, this->toEL); } } } @@ -1517,7 +1504,7 @@ PCAlignmentFault::invoke(ThreadContext *tc, const StaticInstPtr &inst) ArmFaultVals::invoke(tc, inst); assert(from64); // Set the FAR - tc->setMiscReg(getFaultAddrReg64(), faultPC); + misc_regs::writeRegister(tc, faultPC, toEL); } bool @@ -1661,8 +1648,7 @@ Watchpoint::invoke(ThreadContext *tc, const StaticInstPtr &inst) { ArmFaultVals::invoke(tc, inst); // Set the FAR - tc->setMiscReg(getFaultAddrReg64(), vAddr); - + misc_regs::writeRegister(tc, vAddr, toEL); } bool diff --git a/src/arch/arm/faults.hh b/src/arch/arm/faults.hh index ec60d629f5..d289ac2f40 100644 --- a/src/arch/arm/faults.hh +++ b/src/arch/arm/faults.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2013, 2016-2019, 2022 Arm Limited + * Copyright (c) 2010, 2012-2013, 2016-2019, 2022, 2024 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -220,9 +220,6 @@ class ArmFault : public FaultBase // Returns the actual syndrome register to use based on the target // exception level MiscRegIndex getSyndromeReg64() const; - // Returns the actual fault address register to use based on the target - // exception level - MiscRegIndex getFaultAddrReg64() const; void invoke(ThreadContext *tc, const StaticInstPtr &inst = nullStaticInstPtr) override; diff --git a/src/arch/arm/insts/mem64.cc b/src/arch/arm/insts/mem64.cc index 7576a5c2af..ead4428ad6 100644 --- a/src/arch/arm/insts/mem64.cc +++ b/src/arch/arm/insts/mem64.cc @@ -61,8 +61,8 @@ SysDC64::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const uint32_t SysDC64::iss() const { - const MiscRegNum64 &misc_reg = encodeAArch64SysReg(dest); - return _iss(misc_reg, base); + const auto misc_reg = encodeAArch64SysReg(dest); + return _iss(misc_reg.value(), base); } void diff --git a/src/arch/arm/insts/misc.cc b/src/arch/arm/insts/misc.cc index 546d2caebb..78d489ec12 100644 --- a/src/arch/arm/insts/misc.cc +++ b/src/arch/arm/insts/misc.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2013, 2017-2018, 2021 Arm Limited + * Copyright (c) 2010, 2012-2013, 2017-2018, 2021, 2023-2024 Arm Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -408,11 +408,20 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const switch (dest_idx) { case MISCREG_TLBIALL: // TLBI all entries, EL0&1, { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIALL tlbiOp(EL1, secure); - tlbiOp(tc); + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TLBIALL tlbiOp(TranslationRegime::EL10, secure); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate All, Inner Shareable @@ -421,58 +430,94 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIALL tlbiOp(EL1, secure); + TLBIALL tlbiOp(TranslationRegime::EL10, secure); tlbiOp.broadcast(tc); return; } // Instruction TLB Invalidate All case MISCREG_ITLBIALL: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - ITLBIALL tlbiOp(EL1, secure); - tlbiOp(tc); + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + ITLBIALL tlbiOp(TranslationRegime::EL10, secure); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // Data TLB Invalidate All case MISCREG_DTLBIALL: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - DTLBIALL tlbiOp(EL1, secure); - tlbiOp(tc); + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + DTLBIALL tlbiOp(TranslationRegime::EL10, secure); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate by VA case MISCREG_TLBIMVA: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TLBIMVA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), bits(value, 7, 0), false); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate by VA, Last Level case MISCREG_TLBIMVAL: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TLBIMVA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), bits(value, 7, 0), true); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate by VA, Inner Shareable @@ -481,7 +526,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(EL1, + TLBIMVA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), bits(value, 7, 0), @@ -496,7 +541,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(EL1, + TLBIMVA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), bits(value, 7, 0), @@ -508,14 +553,23 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const // TLB Invalidate by ASID match case MISCREG_TLBIASID: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIASID tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TLBIASID tlbiOp(TranslationRegime::EL10, secure, bits(value, 7, 0)); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate by ASID match, Inner Shareable @@ -524,7 +578,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIASID tlbiOp(EL1, + TLBIASID tlbiOp(TranslationRegime::EL10, secure, bits(value, 7, 0)); @@ -534,25 +588,42 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const // TLB Invalidate by VA, All ASID case MISCREG_TLBIMVAA: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL1, secure, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + TLBIMVAA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), false); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate by VA, Last Level, All ASID case MISCREG_TLBIMVAAL: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL1, secure, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TLBIMVAA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), true); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate by VA, All ASID, Inner Shareable @@ -561,7 +632,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL1, secure, + TLBIMVAA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), false); tlbiOp.broadcast(tc); @@ -573,7 +644,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL1, secure, + TLBIMVAA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), true); tlbiOp.broadcast(tc); @@ -585,7 +656,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL2, secure, + TLBIMVAA tlbiOp(TranslationRegime::EL2, secure, mbits(value, 31, 12), false); tlbiOp(tc); @@ -597,7 +668,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL2, secure, + TLBIMVAA tlbiOp(TranslationRegime::EL2, secure, mbits(value, 31, 12), true); tlbiOp(tc); @@ -609,7 +680,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL2, secure, + TLBIMVAA tlbiOp(TranslationRegime::EL2, secure, mbits(value, 31, 12), false); tlbiOp.broadcast(tc); @@ -621,7 +692,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(EL2, secure, + TLBIMVAA tlbiOp(TranslationRegime::EL2, secure, mbits(value, 31, 12), true); tlbiOp.broadcast(tc); @@ -633,7 +704,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIIPA tlbiOp(EL1, + TLBIIPA tlbiOp(TranslationRegime::EL10, secure, static_cast(bits(value, 35, 0)) << 12, false); @@ -648,7 +719,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIIPA tlbiOp(EL1, + TLBIIPA tlbiOp(TranslationRegime::EL10, secure, static_cast(bits(value, 35, 0)) << 12, true); @@ -663,7 +734,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIIPA tlbiOp(EL1, + TLBIIPA tlbiOp(TranslationRegime::EL10, secure, static_cast(bits(value, 35, 0)) << 12, false); @@ -678,7 +749,7 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIIPA tlbiOp(EL1, + TLBIIPA tlbiOp(TranslationRegime::EL10, secure, static_cast(bits(value, 35, 0)) << 12, true); @@ -689,82 +760,117 @@ TlbiOp::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const // Instruction TLB Invalidate by VA case MISCREG_ITLBIMVA: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - ITLBIMVA tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + ITLBIMVA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), bits(value, 7, 0)); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // Data TLB Invalidate by VA case MISCREG_DTLBIMVA: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - DTLBIMVA tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + DTLBIMVA tlbiOp(TranslationRegime::EL10, secure, mbits(value, 31, 12), bits(value, 7, 0)); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // Instruction TLB Invalidate by ASID match case MISCREG_ITLBIASID: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - ITLBIASID tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + ITLBIASID tlbiOp(TranslationRegime::EL10, secure, bits(value, 7, 0)); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // Data TLB Invalidate by ASID match case MISCREG_DTLBIASID: { + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - DTLBIASID tlbiOp(EL1, + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + DTLBIASID tlbiOp(TranslationRegime::EL10, secure, bits(value, 7, 0)); - tlbiOp(tc); + if (shareable) { + tlbiOp.broadcast(tc); + } else { + tlbiOp(tc); + } return; } // TLB Invalidate All, Non-Secure Non-Hyp case MISCREG_TLBIALLNSNH: { - TLBIALLN tlbiOp(EL1); + TLBIALLN tlbiOp(TranslationRegime::EL10); tlbiOp(tc); return; } // TLB Invalidate All, Non-Secure Non-Hyp, Inner Shareable case MISCREG_TLBIALLNSNHIS: { - TLBIALLN tlbiOp(EL1); + TLBIALLN tlbiOp(TranslationRegime::EL10); tlbiOp.broadcast(tc); return; } // TLB Invalidate All, Hyp mode case MISCREG_TLBIALLH: { - TLBIALLN tlbiOp(EL2); + TLBIALLN tlbiOp(TranslationRegime::EL2); tlbiOp(tc); return; } // TLB Invalidate All, Hyp mode, Inner Shareable case MISCREG_TLBIALLHIS: { - TLBIALLN tlbiOp(EL2); + TLBIALLN tlbiOp(TranslationRegime::EL2); tlbiOp.broadcast(tc); return; } diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc index 4919d92da8..a5ca423ea6 100644 --- a/src/arch/arm/insts/misc64.cc +++ b/src/arch/arm/insts/misc64.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013,2017-2023 Arm Limited + * Copyright (c) 2011-2013,2017-2024 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -186,8 +186,9 @@ MiscRegRegImmOp64::generateDisassembly( uint32_t MiscRegRegImmOp64::iss() const { - const MiscRegNum64 &misc_reg = encodeAArch64SysReg(dest); - return _iss(misc_reg, op1); + const auto misc_reg = encodeAArch64SysReg(dest); + assert(misc_reg.has_value()); + return _iss(misc_reg.value(), op1); } std::string @@ -205,8 +206,9 @@ RegMiscRegImmOp64::generateDisassembly( uint32_t RegMiscRegImmOp64::iss() const { - const MiscRegNum64 &misc_reg = encodeAArch64SysReg(op1); - return _iss(misc_reg, dest); + const auto misc_reg = encodeAArch64SysReg(op1); + assert(misc_reg.has_value()); + return _iss(misc_reg.value(), dest); } Fault @@ -243,964 +245,1114 @@ RegNone::generateDisassembly( return ss.str(); } +void +TlbiOp64::tlbiAll(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable) +{ + TLBIALLEL tlbi_op(regime, secure); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } +} + +void +TlbiOp64::tlbiVmall(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool stage2) +{ + TLBIVMALL tlbi_op(regime, secure, stage2); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } +} + +void +TlbiOp64::tlbiVa(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool last_level) +{ + if (MMU::hasUnprivRegime(regime)) { + // The asid will only be used when e2h == 1 + bool asid_16bits = ArmSystem::haveLargeAsid64(tc); + auto asid = asid_16bits ? bits(value, 63, 48) : + bits(value, 55, 48); + + TLBIMVA tlbi_op(regime, secure, static_cast(bits(value, 43, 0)) << 12, + asid, last_level); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } + } else { + TLBIMVAA tlbi_op(regime, secure, static_cast(bits(value, 43, 0)) << 12, last_level); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } + } +} + +void +TlbiOp64::tlbiVaa(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool last_level) +{ + TLBIMVAA tlbi_op(regime, secure, static_cast(bits(value, 43, 0)) << 12, last_level); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } +} + +void +TlbiOp64::tlbiAsid(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable) +{ + bool asid_16bits = ArmSystem::haveLargeAsid64(tc); + auto asid = asid_16bits ? bits(value, 63, 48) : + bits(value, 55, 48); + + TLBIASID tlbi_op(regime, secure, asid); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } +} + +void +TlbiOp64::tlbiIpaS2(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool last_level) +{ + if (EL2Enabled(tc)) { + auto isa = static_cast(tc->getIsaPtr()); + auto release = isa->getRelease(); + + SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); + bool secure = release->has(ArmExtension::SECURITY) && + !scr.ns && !bits(value, 63); + + const int top_bit = ArmSystem::physAddrRange(tc) == 52 ? + 39 : 35; + TLBIIPA tlbi_op(TranslationRegime::EL10, secure, + static_cast(bits(value, top_bit, 0)) << 12, + last_level); + + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } + } +} + +void +TlbiOp64::tlbiRvaa(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool last_level) +{ + TLBIRMVAA tlbi_op(regime, secure, value, last_level); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } +} + +void +TlbiOp64::tlbiRva(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool last_level) +{ + if (MMU::hasUnprivRegime(regime)) { + // The asid will only be used when e2h == 1 + bool asid_16bits = ArmSystem::haveLargeAsid64(tc); + auto asid = asid_16bits ? bits(value, 63, 48) : + bits(value, 55, 48); + + TLBIRMVA tlbi_op(regime, secure, value, asid, last_level); + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } + } else { + tlbiRvaa(tc, value, secure, regime, shareable, last_level); + } +} + +void +TlbiOp64::tlbiRipaS2(ThreadContext *tc, RegVal value, + bool secure, TranslationRegime regime, bool shareable, bool last_level) +{ + if (EL2Enabled(tc)) { + auto isa = static_cast(tc->getIsaPtr()); + auto release = isa->getRelease(); + SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); + bool secure = release->has(ArmExtension::SECURITY) && + !scr.ns && !bits(value, 63); + + TLBIRIPA tlbi_op(TranslationRegime::EL10, secure, value, last_level); + + if (shareable) { + tlbi_op.broadcast(tc); + } else { + tlbi_op(tc); + } + } +} + +std::unordered_map TlbiOp64::tlbiOps = { + { MISCREG_TLBI_ALLE3, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiAll(tc, value, + true, // secure + TranslationRegime::EL3, // regime + false); // shareable + } + }, + + { MISCREG_TLBI_ALLE3IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiAll(tc, value, + true, // secure + TranslationRegime::EL3, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_ALLE3OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiAll(tc, value, + true, // secure + TranslationRegime::EL3, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_ALLE2, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiAll(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + false); // shareable + } + }, + + { MISCREG_TLBI_ALLE2IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiAll(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_ALLE2OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiAll(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_ALLE1, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiAll(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + false); // shareable + } + }, + + { MISCREG_TLBI_ALLE1IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiAll(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_ALLE1OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiAll(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_VMALLE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiVmall(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable); // shareable + } + }, + + { MISCREG_TLBI_VMALLE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVmall(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_VMALLE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVmall(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_VMALLS12E1, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVmall(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + false, // shareable + true); // stage2 + } + }, + + { MISCREG_TLBI_VMALLS12E1IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVmall(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + true); // stage2 + } + }, + + { MISCREG_TLBI_VMALLS12E1OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVmall(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + true); // stage2 + } + }, + + { MISCREG_TLBI_VAE3, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + true, // secure + TranslationRegime::EL3, // regime + false, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VAE3IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + true, // secure + TranslationRegime::EL3, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VAE3OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + true, // secure + TranslationRegime::EL3, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VALE3, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + true, // secure + TranslationRegime::EL3, // regime + false, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VALE3IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + true, // secure + TranslationRegime::EL3, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VALE3OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + true, // secure + TranslationRegime::EL3, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VAE2, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + false, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VAE2IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, EL2), // secure + TranslationRegime::EL2, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VAE2OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc,EL2), // secure + TranslationRegime::EL2, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VALE2, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + false, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VALE2IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VALE2OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VAE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VAE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VAE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_VALE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + false, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VALE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_VALE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_ASIDE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiAsid(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable); // shareable + } + }, + + { MISCREG_TLBI_ASIDE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiAsid(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_ASIDE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiAsid(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true); // shareable + } + }, + + { MISCREG_TLBI_VAAE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiVaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + false); // last level + } + }, + + { MISCREG_TLBI_VAAE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level + } + }, + + { MISCREG_TLBI_VAAE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level + } + }, + + { MISCREG_TLBI_VAALE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiVaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + true); // last level + } + }, + + { MISCREG_TLBI_VAALE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level + } + }, + + { MISCREG_TLBI_VAALE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiVaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level + } + }, + + { MISCREG_TLBI_IPAS2E1, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiIpaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + false, // shareable + false); // last level + } + }, + + { MISCREG_TLBI_IPAS2E1IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiIpaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + false); // last level + } + }, + + { MISCREG_TLBI_IPAS2E1OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiIpaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + false); // last level + } + }, + + { MISCREG_TLBI_IPAS2LE1, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiIpaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + false, // shareable + true); // last level + } + }, + + { MISCREG_TLBI_IPAS2LE1IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiIpaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + true); // last level + } + }, + + { MISCREG_TLBI_IPAS2LE1OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiIpaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + true); // last level + } + }, + + { MISCREG_TLBI_RVAE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + tlbiRva(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + tlbiRva(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + tlbiRva(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAAE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiRvaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAAE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiRvaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAAE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiRvaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVALE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + tlbiRva(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVALE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + tlbiRva(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVALE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + tlbiRva(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVAALE1, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + // Check for Force Broadcast. Ignored if HCR_EL2.TGE == 1 + HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); + bool shareable = currEL(tc) == EL1 && EL2Enabled(tc) && + hcr.fb && !hcr.tge; + + TlbiOp64::tlbiRvaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + shareable, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVAALE1IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiRvaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVAALE1OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL0) ? + TranslationRegime::EL20 : TranslationRegime::EL10; + + TlbiOp64::tlbiRvaa(tc, value, + isSecureAtEL(tc, translationEl(regime)), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RIPAS2E1, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiRipaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + false, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RIPAS2E1IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiRipaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RIPAS2E1OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiRipaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RIPAS2LE1, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiRipaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + false, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RIPAS2LE1IS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiRipaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RIPAS2LE1OS, [](ThreadContext *tc, RegVal value) + { + TlbiOp64::tlbiRipaS2(tc, value, + isSecureAtEL(tc, EL1), // secure + TranslationRegime::EL10, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVAE2, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + tlbiRva(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + false, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAE2IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + tlbiRva(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAE2OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + tlbiRva(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVALE2, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + tlbiRva(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + false, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVALE2IS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + tlbiRva(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVALE2OS, [](ThreadContext *tc, RegVal value) + { + const TranslationRegime regime = ELIsInHost(tc, EL2) ? + TranslationRegime::EL20 : TranslationRegime::EL2; + + tlbiRva(tc, value, + isSecureAtEL(tc, EL2), // secure + regime, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVAE3, [](ThreadContext *tc, RegVal value) + { + tlbiRva(tc, value, + isSecureAtEL(tc, EL3), // secure + TranslationRegime::EL3, // regime + false, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAE3IS, [](ThreadContext *tc, RegVal value) + { + tlbiRva(tc, value, + isSecureAtEL(tc, EL3), // secure + TranslationRegime::EL3, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVAE3OS, [](ThreadContext *tc, RegVal value) + { + tlbiRva(tc, value, + isSecureAtEL(tc, EL3), // secure + TranslationRegime::EL3, // regime + true, // shareable + false); // last level only + } + }, + + { MISCREG_TLBI_RVALE3, [](ThreadContext *tc, RegVal value) + { + tlbiRva(tc, value, + isSecureAtEL(tc, EL3), // secure + TranslationRegime::EL3, // regime + false, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVALE3IS, [](ThreadContext *tc, RegVal value) + { + tlbiRva(tc, value, + isSecureAtEL(tc, EL3), // secure + TranslationRegime::EL3, // regime + true, // shareable + true); // last level only + } + }, + + { MISCREG_TLBI_RVALE3OS, [](ThreadContext *tc, RegVal value) + { + tlbiRva(tc, value, + isSecureAtEL(tc, EL3), // secure + TranslationRegime::EL3, // regime + true, // shareable + true); // last level only + } + }, +}; + void TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) const { ThreadContext* tc = xc->tcBase(); - auto isa = static_cast(tc->getIsaPtr()); - auto release = isa->getRelease(); - bool asid_16bits = ArmSystem::haveLargeAsid64(tc); - - switch (dest_idx) { - // AArch64 TLB Invalidate All, EL3 - case MISCREG_TLBI_ALLE3: - { - TLBIALLEL tlbiOp(EL3, true); - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate All, EL3, Inner Shareable - case MISCREG_TLBI_ALLE3IS: - // AArch64 TLB Invalidate All, EL3, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_ALLE3OS: - { - TLBIALLEL tlbiOp(EL3, true); - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate All, EL2 - case MISCREG_TLBI_ALLE2: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIALLEL tlbiOp(EL2, secure); - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate All, EL2, Inner Shareable - case MISCREG_TLBI_ALLE2IS: - // AArch64 TLB Invalidate All, EL2, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_ALLE2OS: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIALLEL tlbiOp(EL2, secure); - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate All, EL1 - case MISCREG_TLBI_ALLE1: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIALLEL tlbiOp(EL1, secure); - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate All, EL1, Inner Shareable - case MISCREG_TLBI_ALLE1IS: - // AArch64 TLB Invalidate All, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_ALLE1OS: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIALLEL tlbiOp(EL1, secure); - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_VMALLS12E1: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIVMALL tlbiOp(EL1, secure, true); - tlbiOp(tc); - return; - } - case MISCREG_TLBI_VMALLE1: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIVMALL tlbiOp(target_el, secure, false); - tlbiOp(tc); - return; - } - case MISCREG_TLBI_VMALLS12E1IS: - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VMALLS12E1OS: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIVMALL tlbiOp(EL1, secure, true); - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_VMALLE1IS: - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VMALLE1OS: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIVMALL tlbiOp(target_el, secure, false); - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by VA, EL3 - case MISCREG_TLBI_VAE3_Xt: - { - - TLBIMVAA tlbiOp(EL3, true, - static_cast(bits(value, 43, 0)) << 12, - false); - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by VA, Last Level, EL3 - case MISCREG_TLBI_VALE3_Xt: - { - - TLBIMVAA tlbiOp(EL3, true, - static_cast(bits(value, 43, 0)) << 12, - true); - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by VA, EL3, Inner Shareable - case MISCREG_TLBI_VAE3IS_Xt: - // AArch64 TLB Invalidate by VA, EL3, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VAE3OS_Xt: - { - TLBIMVAA tlbiOp(EL3, true, - static_cast(bits(value, 43, 0)) << 12, - false); - - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by VA, Last Level, EL3, Inner Shareable - case MISCREG_TLBI_VALE3IS_Xt: - // AArch64 TLB Invalidate by VA, Last Level, EL3, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VALE3OS_Xt: - { - TLBIMVAA tlbiOp(EL3, true, - static_cast(bits(value, 43, 0)) << 12, - true); - - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by VA, EL2 - case MISCREG_TLBI_VAE2_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIMVA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, false); - tlbiOp(tc); - } else { - TLBIMVAA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - false); - tlbiOp(tc); - } - return; - } - // AArch64 TLB Invalidate by VA, Last Level, EL2 - case MISCREG_TLBI_VALE2_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIMVA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, true); - tlbiOp(tc); - } else { - TLBIMVAA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - true); - tlbiOp(tc); - } - return; - } - // AArch64 TLB Invalidate by VA, EL2, Inner Shareable - case MISCREG_TLBI_VAE2IS_Xt: - // AArch64 TLB Invalidate by VA, EL2, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VAE2OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIMVA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, false); - tlbiOp.broadcast(tc); - } else { - TLBIMVAA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - false); - tlbiOp.broadcast(tc); - } - return; - } - // AArch64 TLB Invalidate by VA, Last Level, EL2, Inner Shareable - case MISCREG_TLBI_VALE2IS_Xt: - // AArch64 TLB Invalidate by VA, Last Level, EL2, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VALE2OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIMVA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, true); - tlbiOp.broadcast(tc); - } else { - TLBIMVAA tlbiOp(EL2, secure, - static_cast(bits(value, 43, 0)) << 12, - true); - tlbiOp.broadcast(tc); - } - return; - } - // AArch64 TLB Invalidate by VA, EL1 - case MISCREG_TLBI_VAE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, false); - - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by VA, Last Level, EL1 - case MISCREG_TLBI_VALE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, true); - - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by VA, EL1, Inner Shareable - case MISCREG_TLBI_VAE1IS_Xt: - // AArch64 TLB Invalidate by VA, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VAE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, false); - - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_VALE1IS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - asid, true); - - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by ASID, EL1 - case MISCREG_TLBI_ASIDE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIASID tlbiOp(target_el, secure, asid); - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by ASID, EL1, Inner Shareable - case MISCREG_TLBI_ASIDE1IS_Xt: - // AArch64 TLB Invalidate by ASID, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_ASIDE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIASID tlbiOp(target_el, secure, asid); - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by VA, All ASID, EL1 - case MISCREG_TLBI_VAAE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - false); - - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by VA, Last Level, All ASID, EL1 - case MISCREG_TLBI_VAALE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - true); - - tlbiOp(tc); - return; - } - // AArch64 TLB Invalidate by VA, All ASID, EL1, Inner Shareable - case MISCREG_TLBI_VAAE1IS_Xt: - // AArch64 TLB Invalidate by VA, All ASID, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VAAE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - false); - - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by VA, All ASID, - // Last Level, EL1, Inner Shareable - case MISCREG_TLBI_VAALE1IS_Xt: - // AArch64 TLB Invalidate by VA, All ASID, - // Last Level, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_VAALE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIMVAA tlbiOp(target_el, secure, - static_cast(bits(value, 43, 0)) << 12, - true); - - tlbiOp.broadcast(tc); - return; - } - // AArch64 TLB Invalidate by Intermediate Physical Address, - // Stage 2, EL1 - case MISCREG_TLBI_IPAS2E1_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - const int top_bit = ArmSystem::physAddrRange(tc) == 52 ? - 39 : 35; - TLBIIPA tlbiOp(EL1, secure, - static_cast(bits(value, top_bit, 0)) << 12, - false); - - tlbiOp(tc); - } - return; - } - // AArch64 TLB Invalidate by Intermediate Physical Address, - // Stage 2, Last Level EL1 - case MISCREG_TLBI_IPAS2LE1_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - TLBIIPA tlbiOp(EL1, secure, - static_cast(bits(value, 35, 0)) << 12, - true); - - tlbiOp(tc); - } - return; - } - // AArch64 TLB Invalidate by Intermediate Physical Address, - // Stage 2, EL1, Inner Shareable - case MISCREG_TLBI_IPAS2E1IS_Xt: - // AArch64 TLB Invalidate by Intermediate Physical Address, - // Stage 2, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_IPAS2E1OS_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - const int top_bit = ArmSystem::physAddrRange(tc) == 52 ? - 39 : 35; - TLBIIPA tlbiOp(EL1, secure, - static_cast(bits(value, top_bit, 0)) << 12, - false); - - tlbiOp.broadcast(tc); - } - return; - } - // AArch64 TLB Invalidate by Intermediate Physical Address, - // Stage 2, Last Level, EL1, Inner Shareable - case MISCREG_TLBI_IPAS2LE1IS_Xt: - // AArch64 TLB Invalidate by Intermediate Physical Address, - // Stage 2, Last Level, EL1, Outer Shareable - // We are currently not distinguishing Inner and Outer domains. - // We therefore implement TLBIOS instructions as TLBIIS - case MISCREG_TLBI_IPAS2LE1OS_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - TLBIIPA tlbiOp(EL1, secure, - static_cast(bits(value, 35, 0)) << 12, - true); - - tlbiOp.broadcast(tc); - } - return; - } - case MISCREG_TLBI_RVAE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVA tlbiOp(target_el, secure, value, asid, false); - - if (tlbiOp.valid()) - tlbiOp(tc); - return; - } - case MISCREG_TLBI_RVAE1IS_Xt: - case MISCREG_TLBI_RVAE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVA tlbiOp(target_el, secure, value, asid, false); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_RVAAE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVAA tlbiOp(target_el, secure, value, false); - - if (tlbiOp.valid()) - tlbiOp(tc); - return; - } - case MISCREG_TLBI_RVAAE1IS_Xt: - case MISCREG_TLBI_RVAAE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVAA tlbiOp(target_el, secure, value, false); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_RVALE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVA tlbiOp(target_el, secure, value, asid, true); - - if (tlbiOp.valid()) - tlbiOp(tc); - return; - } - case MISCREG_TLBI_RVALE1IS_Xt: - case MISCREG_TLBI_RVALE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVA tlbiOp(target_el, secure, value, asid, true); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_RVAALE1_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVAA tlbiOp(target_el, secure, value, true); - - if (tlbiOp.valid()) - tlbiOp(tc); - return; - } - case MISCREG_TLBI_RVAALE1IS_Xt: - case MISCREG_TLBI_RVAALE1OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - ExceptionLevel target_el = EL1; - if (EL2Enabled(tc)) { - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - if (hcr.tge && hcr.e2h) { - target_el = EL2; - } - } - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - TLBIRMVAA tlbiOp(target_el, secure, value, true); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_RIPAS2E1_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - TLBIRIPA tlbiOp(EL1, secure, value, false); - - tlbiOp(tc); - } - return; - } - case MISCREG_TLBI_RIPAS2E1IS_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - TLBIRIPA tlbiOp(EL1, secure, value, false); - - tlbiOp.broadcast(tc); - } - return; - } - case MISCREG_TLBI_RIPAS2LE1_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - TLBIRIPA tlbiOp(EL1, secure, value, true); - - tlbiOp(tc); - } - return; - } - case MISCREG_TLBI_RIPAS2LE1IS_Xt: - { - if (EL2Enabled(tc)) { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - - bool secure = release->has(ArmExtension::SECURITY) && - !scr.ns && !bits(value, 63); - - TLBIRIPA tlbiOp(EL1, secure, value, true); - - tlbiOp.broadcast(tc); - } - return; - } - case MISCREG_TLBI_RVAE2_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIRMVA tlbiOp(EL2, secure, value, asid, false); - - if (tlbiOp.valid()) - tlbiOp(tc); - } else { - TLBIRMVAA tlbiOp(EL2, secure, value, false); - - if (tlbiOp.valid()) - tlbiOp(tc); - } - return; - } - case MISCREG_TLBI_RVAE2IS_Xt: - case MISCREG_TLBI_RVAE2OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIRMVA tlbiOp(EL2, secure, value, asid, false); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - } else { - TLBIRMVAA tlbiOp(EL2, secure, value, false); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - } - return; - } - case MISCREG_TLBI_RVALE2_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIRMVA tlbiOp(EL2, secure, value, asid, true); - - if (tlbiOp.valid()) - tlbiOp(tc); - } else { - TLBIRMVAA tlbiOp(EL2, secure, value, true); - - if (tlbiOp.valid()) - tlbiOp(tc); - } - return; - } - case MISCREG_TLBI_RVALE2IS_Xt: - case MISCREG_TLBI_RVALE2OS_Xt: - { - SCR scr = tc->readMiscReg(MISCREG_SCR_EL3); - HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2); - - bool secure = release->has(ArmExtension::SECURITY) && !scr.ns; - - if (hcr.e2h) { - // The asid will only be used when e2h == 1 - auto asid = asid_16bits ? bits(value, 63, 48) : - bits(value, 55, 48); - - TLBIRMVA tlbiOp(EL2, secure, value, asid, true); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - } else { - TLBIRMVAA tlbiOp(EL2, secure, value, true); - - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - } - return; - } - case MISCREG_TLBI_RVAE3_Xt: - { - TLBIRMVAA tlbiOp(EL3, true, value, false); - if (tlbiOp.valid()) - tlbiOp(tc); - return; - } - case MISCREG_TLBI_RVAE3IS_Xt: - case MISCREG_TLBI_RVAE3OS_Xt: - { - TLBIRMVAA tlbiOp(EL3, true, value, false); - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - return; - } - case MISCREG_TLBI_RVALE3_Xt: - { - TLBIRMVAA tlbiOp(EL3, true, value, true); - if (tlbiOp.valid()) - tlbiOp(tc); - return; - } - case MISCREG_TLBI_RVALE3IS_Xt: - case MISCREG_TLBI_RVALE3OS_Xt: - { - TLBIRMVAA tlbiOp(EL3, true, value, true); - if (tlbiOp.valid()) - tlbiOp.broadcast(tc); - return; - } - default: + if (auto it = tlbiOps.find(dest_idx); it != tlbiOps.end()) { + it->second(tc, value); + } else { panic("Invalid TLBI\n"); } } diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh index 3a67210b92..14ed41cb75 100644 --- a/src/arch/arm/insts/misc64.hh +++ b/src/arch/arm/insts/misc64.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2013,2017-2019, 2021-2022 Arm Limited + * Copyright (c) 2011-2013,2017-2019, 2021-2022, 2024 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -39,6 +39,7 @@ #define __ARCH_ARM_INSTS_MISC64_HH__ #include "arch/arm/insts/static_inst.hh" +#include "arch/arm/types.hh" namespace gem5 { @@ -283,6 +284,45 @@ class RegNone : public ArmISA::ArmStaticInst class TlbiOp64 : public MiscRegRegImmOp64 { + protected: + using TlbiFunc = std::function; + + static std::unordered_map tlbiOps; + + static void tlbiAll(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable); + + static void tlbiVmall(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool stage2=false); + + static void tlbiVa(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool last_level); + + static void tlbiVaa(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool last_level); + + static void tlbiAsid(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable); + + static void tlbiIpaS2(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool last_level); + + static void tlbiRvaa(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool last_level); + + static void tlbiRva(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool last_level); + + static void tlbiRipaS2(ThreadContext *tc, RegVal value, + bool secure, ArmISA::TranslationRegime regime, bool shareable, + bool last_level); + protected: TlbiOp64(const char *mnem, ArmISA::ExtMachInst _machInst, OpClass __opClass, ArmISA::MiscRegIndex _dest, diff --git a/src/arch/arm/insts/pseudo.cc b/src/arch/arm/insts/pseudo.cc index 3d017c1857..0402071255 100644 --- a/src/arch/arm/insts/pseudo.cc +++ b/src/arch/arm/insts/pseudo.cc @@ -116,6 +116,7 @@ FailUnimplemented::FailUnimplemented(const char *_mnemonic, // don't call execute() (which panics) if we're on a // speculative path flags[IsNonSpeculative] = true; + flags[IsInvalid] = true; } FailUnimplemented::FailUnimplemented(const char *_mnemonic, @@ -127,6 +128,7 @@ FailUnimplemented::FailUnimplemented(const char *_mnemonic, // don't call execute() (which panics) if we're on a // speculative path flags[IsNonSpeculative] = true; + flags[IsInvalid] = true; } Fault diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index f961a2d2c4..69ca95d306 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2023 Arm Limited + * Copyright (c) 2010-2024 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,7 @@ #include "arch/arm/mmu.hh" #include "arch/arm/pmu.hh" #include "arch/arm/regs/misc.hh" +#include "arch/arm/regs/misc_accessors.hh" #include "arch/arm/self_debug.hh" #include "arch/arm/system.hh" #include "arch/arm/utility.hh" @@ -72,6 +73,8 @@ namespace gem5 namespace ArmISA { +using namespace misc_regs; + namespace { @@ -80,7 +83,7 @@ RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs); } // anonymous namespace -ISA::ISA(const Params &p) : BaseISA(p), system(NULL), +ISA::ISA(const Params &p) : BaseISA(p, "arm"), system(NULL), _decoderFlavor(p.decoderFlavor), pmu(p.pmu), impdefAsNop(p.impdef_nop) { _regClasses.push_back(&flatIntRegClass); @@ -106,6 +109,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL), // Cache system-level properties if (FullSystem && system) { highestELIs64 = system->highestELIs64(); + highestEL = system->highestEL(); haveLargeAsid64 = system->haveLargeAsid64(); physAddrRange = system->physAddrRange(); sveVL = system->sveVL(); @@ -114,6 +118,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL), release = system->releaseFS(); } else { highestELIs64 = true; // ArmSystem::highestELIs64 does the same + highestEL = EL1; // ArmSystem::highestEL does the same haveLargeAsid64 = false; physAddrRange = 32; // dummy value sveVL = p.sve_vl_se; @@ -267,6 +272,8 @@ ISA::redirectRegVHE(int misc_reg) return currEL() == EL2 ? MISCREG_CONTEXTIDR_EL2 : misc_reg; case MISCREG_CNTKCTL_EL1: return currEL() == EL2 ? MISCREG_CNTHCTL_EL2 : misc_reg; + case MISCREG_MPAM1_EL1: + return currEL() == EL2 ? MISCREG_MPAM2_EL2 : misc_reg; case MISCREG_CNTP_TVAL: case MISCREG_CNTP_TVAL_EL0: if (ELIsInHost(tc, currEL())) { @@ -356,6 +363,8 @@ ISA::redirectRegVHE(int misc_reg) return MISCREG_CONTEXTIDR_EL1; case MISCREG_CNTKCTL_EL12: return MISCREG_CNTKCTL_EL1; + case MISCREG_MPAM1_EL12: + return MISCREG_MPAM1_EL1; // _EL02 registers case MISCREG_CNTP_TVAL_EL02: return MISCREG_CNTP_TVAL_EL0; @@ -600,6 +609,23 @@ ISA::readMiscReg(RegIndex idx) case MISCREG_HIFAR: // alias for secure IFAR return readMiscRegNoEffect(MISCREG_IFAR_S); + case MISCREG_MPAM1_EL1: + { + MPAM mpam1 = readMiscRegNoEffect(MISCREG_MPAM1_EL1); + mpam1.mpamEn = readRegisterNoEffect( + tc, highestEL).mpamEn; + mpam1.el1.forcedNs = isSecure(tc) ? + readRegisterNoEffect(tc, EL3).el3.forceNs : 0; + return mpam1; + } + case MISCREG_MPAM2_EL2: + { + MPAM mpam2 = readMiscRegNoEffect(MISCREG_MPAM2_EL2); + mpam2.mpamEn = readRegisterNoEffect( + tc, highestEL).mpamEn; + return mpam2; + } + case MISCREG_RNDR: tc->setReg(cc_reg::Nz, (RegVal)0); tc->setReg(cc_reg::C, (RegVal)0); @@ -731,8 +757,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val) const uint32_t ones = (uint32_t)(-1); CPACR cpacrMask = 0; - // Only cp10, cp11, and ase are implemented, nothing else should - // be writable + // Only cp10, cp11, and ase are implemented + // nothing else should be writable cpacrMask.cp10 = ones; cpacrMask.cp11 = ones; cpacrMask.asedis = ones; @@ -1541,6 +1567,8 @@ ISA::getCurSmeVecLenInBits() const void ISA::serialize(CheckpointOut &cp) const { + BaseISA::serialize(cp); + DPRINTF(Checkpoint, "Serializing Arm Misc Registers\n"); SERIALIZE_MAPPING(miscRegs, miscRegName, NUM_PHYS_MISCREGS); } diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index 8ed37ba861..a60c887391 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2012-2023 Arm Limited + * Copyright (c) 2010, 2012-2024 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -90,6 +90,7 @@ namespace ArmISA // Cached copies of system-level properties bool highestELIs64; + ExceptionLevel highestEL; bool haveLargeAsid64; uint8_t physAddrRange; @@ -434,6 +435,8 @@ namespace ArmISA void globalClearExclusive() override; void globalClearExclusive(ExecContext *xc) override; + + int64_t getVectorLengthInBytes() const override { return sveVL * 16; } }; } // namespace ArmISA diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa index 30f9009121..246d1a7836 100644 --- a/src/arch/arm/isa/formats/aarch64.isa +++ b/src/arch/arm/isa/formats/aarch64.isa @@ -1,4 +1,4 @@ -// Copyright (c) 2011-2023 Arm Limited +// Copyright (c) 2011-2024 Arm Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -531,29 +531,29 @@ namespace Aarch64 case MISCREG_TLBI_ALLE1: case MISCREG_TLBI_VMALLS12E1: case MISCREG_TLBI_VMALLE1: - case MISCREG_TLBI_VAE3_Xt: - case MISCREG_TLBI_VALE3_Xt: - case MISCREG_TLBI_VAE2_Xt: - case MISCREG_TLBI_VALE2_Xt: - case MISCREG_TLBI_VAE1_Xt: - case MISCREG_TLBI_VALE1_Xt: - case MISCREG_TLBI_ASIDE1_Xt: - case MISCREG_TLBI_VAAE1_Xt: - case MISCREG_TLBI_VAALE1_Xt: - case MISCREG_TLBI_IPAS2E1_Xt: - case MISCREG_TLBI_IPAS2LE1_Xt: - case MISCREG_TLBI_RVAE1_Xt: - case MISCREG_TLBI_RVAAE1_Xt: - case MISCREG_TLBI_RVALE1_Xt: - case MISCREG_TLBI_RVAALE1_Xt: - case MISCREG_TLBI_RIPAS2E1_Xt: - case MISCREG_TLBI_RIPAS2LE1_Xt: - case MISCREG_TLBI_RVAE2_Xt: - case MISCREG_TLBI_RVALE2_Xt: - case MISCREG_TLBI_RVAE3_Xt: - case MISCREG_TLBI_RVALE3_Xt: + case MISCREG_TLBI_VAE3: + case MISCREG_TLBI_VALE3: + case MISCREG_TLBI_VAE2: + case MISCREG_TLBI_VALE2: + case MISCREG_TLBI_VAE1: + case MISCREG_TLBI_VALE1: + case MISCREG_TLBI_ASIDE1: + case MISCREG_TLBI_VAAE1: + case MISCREG_TLBI_VAALE1: + case MISCREG_TLBI_IPAS2E1: + case MISCREG_TLBI_IPAS2LE1: + case MISCREG_TLBI_RVAE1: + case MISCREG_TLBI_RVAAE1: + case MISCREG_TLBI_RVALE1: + case MISCREG_TLBI_RVAALE1: + case MISCREG_TLBI_RIPAS2E1: + case MISCREG_TLBI_RIPAS2LE1: + case MISCREG_TLBI_RVAE2: + case MISCREG_TLBI_RVALE2: + case MISCREG_TLBI_RVAE3: + case MISCREG_TLBI_RVALE3: return new Tlbi64LocalHub( - machInst, miscReg, rt); + machInst, miscReg, rt, dec.dvmEnabled); case MISCREG_TLBI_ALLE3IS: case MISCREG_TLBI_ALLE3OS: case MISCREG_TLBI_ALLE2IS: @@ -564,48 +564,48 @@ namespace Aarch64 case MISCREG_TLBI_VMALLS12E1OS: case MISCREG_TLBI_VMALLE1IS: case MISCREG_TLBI_VMALLE1OS: - case MISCREG_TLBI_VAE3IS_Xt: - case MISCREG_TLBI_VAE3OS_Xt: - case MISCREG_TLBI_VALE3IS_Xt: - case MISCREG_TLBI_VALE3OS_Xt: - case MISCREG_TLBI_VAE2IS_Xt: - case MISCREG_TLBI_VAE2OS_Xt: - case MISCREG_TLBI_VALE2IS_Xt: - case MISCREG_TLBI_VALE2OS_Xt: - case MISCREG_TLBI_VAE1IS_Xt: - case MISCREG_TLBI_VAE1OS_Xt: - case MISCREG_TLBI_VALE1IS_Xt: - case MISCREG_TLBI_VALE1OS_Xt: - case MISCREG_TLBI_ASIDE1IS_Xt: - case MISCREG_TLBI_ASIDE1OS_Xt: - case MISCREG_TLBI_VAAE1IS_Xt: - case MISCREG_TLBI_VAAE1OS_Xt: - case MISCREG_TLBI_VAALE1IS_Xt: - case MISCREG_TLBI_VAALE1OS_Xt: - case MISCREG_TLBI_IPAS2E1IS_Xt: - case MISCREG_TLBI_IPAS2E1OS_Xt: - case MISCREG_TLBI_IPAS2LE1IS_Xt: - case MISCREG_TLBI_IPAS2LE1OS_Xt: - case MISCREG_TLBI_RVAE1IS_Xt: - case MISCREG_TLBI_RVAE1OS_Xt: - case MISCREG_TLBI_RVAAE1IS_Xt: - case MISCREG_TLBI_RVAAE1OS_Xt: - case MISCREG_TLBI_RVALE1IS_Xt: - case MISCREG_TLBI_RVALE1OS_Xt: - case MISCREG_TLBI_RVAALE1IS_Xt: - case MISCREG_TLBI_RVAALE1OS_Xt: - case MISCREG_TLBI_RIPAS2E1IS_Xt: - case MISCREG_TLBI_RIPAS2E1OS_Xt: - case MISCREG_TLBI_RIPAS2LE1IS_Xt: - case MISCREG_TLBI_RIPAS2LE1OS_Xt: - case MISCREG_TLBI_RVAE2IS_Xt: - case MISCREG_TLBI_RVAE2OS_Xt: - case MISCREG_TLBI_RVALE2IS_Xt: - case MISCREG_TLBI_RVALE2OS_Xt: - case MISCREG_TLBI_RVAE3IS_Xt: - case MISCREG_TLBI_RVAE3OS_Xt: - case MISCREG_TLBI_RVALE3IS_Xt: - case MISCREG_TLBI_RVALE3OS_Xt: + case MISCREG_TLBI_VAE3IS: + case MISCREG_TLBI_VAE3OS: + case MISCREG_TLBI_VALE3IS: + case MISCREG_TLBI_VALE3OS: + case MISCREG_TLBI_VAE2IS: + case MISCREG_TLBI_VAE2OS: + case MISCREG_TLBI_VALE2IS: + case MISCREG_TLBI_VALE2OS: + case MISCREG_TLBI_VAE1IS: + case MISCREG_TLBI_VAE1OS: + case MISCREG_TLBI_VALE1IS: + case MISCREG_TLBI_VALE1OS: + case MISCREG_TLBI_ASIDE1IS: + case MISCREG_TLBI_ASIDE1OS: + case MISCREG_TLBI_VAAE1IS: + case MISCREG_TLBI_VAAE1OS: + case MISCREG_TLBI_VAALE1IS: + case MISCREG_TLBI_VAALE1OS: + case MISCREG_TLBI_IPAS2E1IS: + case MISCREG_TLBI_IPAS2E1OS: + case MISCREG_TLBI_IPAS2LE1IS: + case MISCREG_TLBI_IPAS2LE1OS: + case MISCREG_TLBI_RVAE1IS: + case MISCREG_TLBI_RVAE1OS: + case MISCREG_TLBI_RVAAE1IS: + case MISCREG_TLBI_RVAAE1OS: + case MISCREG_TLBI_RVALE1IS: + case MISCREG_TLBI_RVALE1OS: + case MISCREG_TLBI_RVAALE1IS: + case MISCREG_TLBI_RVAALE1OS: + case MISCREG_TLBI_RIPAS2E1IS: + case MISCREG_TLBI_RIPAS2E1OS: + case MISCREG_TLBI_RIPAS2LE1IS: + case MISCREG_TLBI_RIPAS2LE1OS: + case MISCREG_TLBI_RVAE2IS: + case MISCREG_TLBI_RVAE2OS: + case MISCREG_TLBI_RVALE2IS: + case MISCREG_TLBI_RVALE2OS: + case MISCREG_TLBI_RVAE3IS: + case MISCREG_TLBI_RVAE3OS: + case MISCREG_TLBI_RVALE3IS: + case MISCREG_TLBI_RVALE3OS: return new Tlbi64ShareableHub( machInst, miscReg, rt, dec.dvmEnabled); default: diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 86c174d7c4..ae8465cecc 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -245,6 +245,65 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveIntMulAdd + StaticInstPtr + decodeSveMultiplyAccIndexed(ExtMachInst machInst) + { + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = (bits(machInst, 10)); + + switch(size) { + case 0b00: + case 0b01: + { + + RegIndex zm_16 = (RegIndex)(uint8_t)bits(machInst, 18, 16); + uint8_t imm_16 = (uint8_t)(bits(machInst, 22) << 2) + | bits(machInst, 20, 19); + switch(opc) + { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm_16, imm_16); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm_16, imm_16); + } + } + break; + + case 0b10: + { + + RegIndex zm_32 = (RegIndex)(uint8_t)bits(machInst, 18, 16); + uint8_t imm_32 = (uint8_t)bits(machInst, 20, 19); + switch(opc) { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm_32, imm_32); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm_32, imm_32); + } + } + break; + + case 0b11: + { + + RegIndex zm_64 = (RegIndex)(uint8_t)bits(machInst, 19, 16); + uint8_t imm_64 = (uint8_t)bits(machInst, 20); + switch(opc) { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm_64, imm_64); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm_64, imm_64); + } + } + break; + } + + return new Unknown64(machInst); + + } // decodeSveMultiplyAccIndexed + StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst) { @@ -509,6 +568,193 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveIntArithUnpred + StaticInstPtr + decodeSveIntMulUnpred(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = bits(machInst, 11, 10); + uint8_t size = bits(machInst, 23, 22); + + switch (opc) { + case 0x1: + if (size == 0x0) { + return new SvePmul(machInst, zd, zn, zm); + } + [[fallthrough]]; + case 0x0: + // MUL (vectors, unpredicated) + case 0x2: + // SMULH (unpredicated) + case 0x3: + // UMULH (unpredicated) + default: + return new Unknown64(machInst); + } + + } // decodeSveIntMulUnpred + + StaticInstPtr + decodeSveIntTerUnpred(ExtMachInst machInst) + { + RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10); + + switch (opc) { + case 0x0: + return new SveEor3(machInst, zdn, zm, zk); + case 0x2: + return new SveBcax(machInst, zdn, zm, zk); + case 0x1: + // BSL + case 0x3: + // BSL1N + case 0x5: + // BSL2N + case 0x7: + // NBSL + default: + return new Unknown64(machInst); + } + } // decodeSveIntTerUnpred + + StaticInstPtr + decodeSve2IntMulLong(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc_u_t = bits(machInst, 12, 10); + uint8_t size = bits(machInst, 23, 22); + + switch (opc_u_t) { + case 0x2: + return decodeSveBinUnpredS2( + size, machInst, zd, zn, zm); + case 0x3: + return decodeSveBinUnpredS2( + size, machInst, zd, zn, zm); + case 0x4: + return decodeSveBinUnpredSigned( + size, machInst, zd, zn, zm); + case 0x5: + return decodeSveBinUnpredSigned( + size, machInst, zd, zn, zm); + case 0x6: + return decodeSveBinUnpredUnsigned( + size, machInst, zd, zn, zm); + case 0x7: + return decodeSveBinUnpredUnsigned( + size, machInst, zd, zn, zm); + case 0x0: + // SQDMULLB + case 0x1: + // SQDMULLT + default: + return new Unknown64(machInst); + } + } // decodeSve2IntMulLong + + StaticInstPtr + decodeSve2BitPerm(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t opc = bits(machInst, 11, 10); + uint8_t size = bits(machInst, 23, 22); + + switch (opc) { + case 0x2: + return decodeSveBinUnpredU( + size, machInst, zd, zn, zm); + case 0x0: + // BEXT + case 0x1: + // BDEP + default: + return new Unknown64(machInst); + } + } // decodeSve2BitPerm + + StaticInstPtr + decodeSveIntRotImm(ExtMachInst machInst) + { + RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5); + uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16); + + uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20, 19); + uint8_t esize = 0; + uint8_t size = 0; + + if (tsize == 0x0) { + return new Unknown64(machInst); + } else if (tsize == 0x1) { + esize = 8; + } else if ((tsize & 0x0E) == 0x2) { + esize = 16; + size = 1; + } else if ((tsize & 0x0C) == 0x4) { + esize = 32; + size = 2; + } else if ((tsize & 0x08) == 0x8) { + esize = 64; + size = 3; + } + + unsigned rot_am = 2 * esize - ((tsize << 3) | imm3); + return decodeSveBinImmDestrUnpredU( + size, machInst, zdn, zm, rot_am); + } // decodeSveIntRotImm + + StaticInstPtr + decodeSve2CryptBinConstr(ExtMachInst machInst) + { + RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); + uint8_t size = bits(machInst, 23, 22); + uint8_t opc = bits(machInst, 10); + uint8_t size_opc = (size << 1) | opc; + + switch (size_opc) { + case 0x1: + return new SveRax1(machInst, zd, zn, zm); + case 0x0: + // SM4EKEY + default: + return new Unknown64(machInst); + } + } // decodeSve2CryptBinConstr + + StaticInstPtr + decodeSve2WideIntArith(ExtMachInst machInst) + { + uint8_t op0 = bits(machInst, 14, 13); + switch (op0) { + case 0b11: + return decodeSve2IntMulLong(machInst); + default: + return new Unknown64(machInst); + } + } + + StaticInstPtr + decodeSve2Crypto(ExtMachInst machInst) + { + uint8_t op2 = bits(machInst, 12, 11); + switch (op2) { + case 0b10: + return decodeSve2CryptBinConstr(machInst); + default: + return new Unknown64(machInst); + } + } + StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst) { @@ -1041,12 +1287,19 @@ namespace Aarch64 decodeSvePermUnpred(ExtMachInst machInst) { uint8_t b12_10 = bits(machInst, 12, 10); - if (b12_10 == 0x4) { + if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) { unsigned size = (unsigned) bits(machInst, 23, 22); RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0); RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - return decodeSveBinUnpredU(size, machInst, zd, zn, zm); + if (b12_10 == 0x4) { // TBL, two sources + return decodeSveBinUnpredU(size, machInst, zd, zn, zm); + } else if (bits(machInst, 10) == 0x1) { // TBX + return decodeSveBinUnpredU(size, machInst, zd, zn, zm); + // } else { // TBL, three sources + // TBL, three sources + } + return new Unknown64(machInst); } else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) { uint8_t size = bits(machInst, 23, 22); RegIndex rn = makeSP( @@ -1391,7 +1644,6 @@ namespace Aarch64 RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10); RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - uint8_t size = bits(machInst, 23, 22); return decodeSveBinConstrPredU(size, @@ -3877,16 +4129,18 @@ namespace Aarch64 } // decodeSveMemStore StaticInstPtr - decodeSveMisc(ExtMachInst machInst) { + decodeSveMisc(ExtMachInst machInst) + { switch(bits(machInst, 13, 10)) { - case 0b0110: { - return decodeSveIntMatMulAdd(machInst); - break; - } - default: { - return new Unknown64(machInst); - break; - } + case 0b0110: + return decodeSveIntMatMulAdd(machInst); + case 0b1100: + case 0b1101: + case 0b1110: + case 0b1111: + return decodeSve2BitPerm(machInst); + default: + return new Unknown64(machInst); } return new Unknown64(machInst); } // decodeSveMisc @@ -3920,6 +4174,11 @@ namespace Aarch64 return decodeSveIntegerDotProductIndexed(machInst); case 0b11: return decodeSveMixedSignDotProductIndexed(machInst); + + // for mla/s indexed , can be renamed + case 0b01: + return decodeSveMultiplyAccIndexed(machInst); + default: return new Unknown64(machInst); } diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa index cb390eb972..9ae075ba2d 100644 --- a/src/arch/arm/isa/formats/sve_top_level.isa +++ b/src/arch/arm/isa/formats/sve_top_level.isa @@ -45,7 +45,9 @@ namespace Aarch64 StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst); StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst); StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst); + StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst); + StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIndexGen(ExtMachInst machInst); StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst); @@ -71,6 +73,12 @@ namespace Aarch64 StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst); StaticInstPtr decodeSveClamp(ExtMachInst machInst); StaticInstPtr decodeSve2Accum(ExtMachInst machInst); + StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst); + StaticInstPtr decodeSve2CryptBinConstr(ExtMachInst machInst); + StaticInstPtr decodeSve2BitPerm(ExtMachInst machInst); + StaticInstPtr decodeSve2IntMulLong(ExtMachInst machInst); + StaticInstPtr decodeSve2WideIntArith(ExtMachInst machInst); + StaticInstPtr decodeSve2Crypto(ExtMachInst machInst); StaticInstPtr decodeSveIntegerDotProductUnpred(ExtMachInst machInst); StaticInstPtr decodeSveIntegerDotProductIndexed(ExtMachInst machInst); @@ -129,10 +137,14 @@ namespace Aarch64 break; case 0b10: case 0b11: - if (bits(machInst, 21) == 0b0 && op2 == 0b10) { + if (bits(machInst, 21) == 0b0 && bits(op2, 1) == 0b0) { + return decodeSve2WideIntArith(machInst); + } else if (bits(machInst, 21) == 0b0 && op2 == 0b10) { return decodeSveMisc(machInst); } else if (bits(machInst, 21) == 0b0 && op2 == 0b11) { return decodeSve2Accum(machInst); + } else if (bits(machInst, 21) == 0b1 && bits(machInst, 15, 13) == 0b111) { + return decodeSve2Crypto(machInst); } else { return new Unknown64(machInst); } @@ -180,7 +192,15 @@ namespace Aarch64 switch (b_15_14) { case 0x0: if (b_13) { - return decodeSveIntLogUnpred(machInst); + if (bits(machInst, 11)) { + return decodeSveIntTerUnpred(machInst); + } else { + if (bits(machInst, 10)) { + return decodeSveIntRotImm(machInst); + } else { + return decodeSveIntLogUnpred(machInst); + } + } } else { if (!bits(machInst, 30)) { return decodeSveIntArithUnpred(machInst); @@ -189,7 +209,7 @@ namespace Aarch64 break; case 0x1: if (b_13) { - return new Unknown64(machInst); + return decodeSveIntMulUnpred(machInst); } else if (b_12) { return decodeSveStackAlloc(machInst); } else { diff --git a/src/arch/arm/isa/insts/crypto.isa b/src/arch/arm/isa/insts/crypto.isa index b6c3ad3c20..5faa4b90d5 100644 --- a/src/arch/arm/isa/insts/crypto.isa +++ b/src/arch/arm/isa/insts/crypto.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- // -// Copyright (c) 2018 ARM Limited +// Copyright (c) 2018, 2024 Arm Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -38,11 +38,10 @@ let {{ cryptoEnabledCheckCode = ''' - auto crypto_reg = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5); - if (!(crypto_reg & %(mask)d)) { + if (!HaveExt(xc->tcBase(), %(extension)s)) { return std::make_shared(machInst, true); } - ''' + ''' + simdEnabledCheckCode header_output = "" decoder_output = "" @@ -150,7 +149,9 @@ let {{ sha256_su0Code = "crypto.sha256Su0(output, input);" sha256_su1Code = "crypto.sha256Su1(output, input, input2);" - aes_enabled = cryptoEnabledCheckCode % { "mask" : 0xF0 } + aes_enabled = cryptoEnabledCheckCode % { + "extension" : "ArmExtension::FEAT_AES" + } cryptoRegRegRegInst("aese", "AESE", "SimdAesOp", aes_enabled, aeseCode) cryptoRegRegRegInst("aesd", "AESD", "SimdAesOp", @@ -160,7 +161,9 @@ let {{ cryptoRegRegInst("aesimc", "AESIMC", "SimdAesMixOp", aes_enabled, aesimcCode) - sha1_enabled = cryptoEnabledCheckCode % { "mask" : 0xF00 } + sha1_enabled = cryptoEnabledCheckCode % { + "extension" : "ArmExtension::FEAT_SHA1" + } cryptoRegRegRegInst("sha1c", "SHA1C", "SimdSha1HashOp", sha1_enabled, sha1_cCode) cryptoRegRegRegInst("sha1p", "SHA1P", "SimdSha1HashOp", @@ -174,7 +177,9 @@ let {{ cryptoRegRegInst("sha1su1", "SHA1SU1", "SimdShaSigma2Op", sha1_enabled, sha1_su1Code) - sha2_enabled = cryptoEnabledCheckCode % { "mask" : 0xF000 } + sha2_enabled = cryptoEnabledCheckCode % { + "extension" : "ArmExtension::FEAT_SHA256" + } cryptoRegRegRegInst("sha256h", "SHA256H", "SimdSha256HashOp", sha2_enabled, sha256_hCode) cryptoRegRegRegInst("sha256h2", "SHA256H2", "SimdSha256Hash2Op", diff --git a/src/arch/arm/isa/insts/crypto64.isa b/src/arch/arm/isa/insts/crypto64.isa index 1ae580fa97..0ed0867136 100644 --- a/src/arch/arm/isa/insts/crypto64.isa +++ b/src/arch/arm/isa/insts/crypto64.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- // -// Copyright (c) 2018 ARM Limited +// Copyright (c) 2018, 2024 Arm Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -41,11 +41,11 @@ let {{ exec_output = "" cryptoEnabledCheckCode = ''' - auto crypto_reg = xc->tcBase()->readMiscReg(MISCREG_ID_AA64ISAR0_EL1); - if (!(crypto_reg & %(mask)d)) { + if (!HaveExt(xc->tcBase(), %(extension)s)) { return std::make_shared(machInst, true); } - ''' + ''' + simd64EnabledCheckCode + cryptoRegRegRegPrefix = ''' Crypto crypto; RegVect srcReg1, srcReg2, destReg; @@ -133,7 +133,9 @@ let {{ sha256_su0Code = "crypto.sha256Su0(output, input);" sha256_su1Code = "crypto.sha256Su1(output, input, input2);" - aes_enabled = cryptoEnabledCheckCode % { "mask" : 0xF0 } + aes_enabled = cryptoEnabledCheckCode % { + "extension" : "ArmExtension::FEAT_AES" + } cryptoRegRegRegInst("aese", "AESE64", "SimdAesOp", aes_enabled, aeseCode) cryptoRegRegRegInst("aesd", "AESD64", "SimdAesOp", @@ -143,7 +145,9 @@ let {{ cryptoRegRegInst("aesimc", "AESIMC64", "SimdAesMixOp", aes_enabled, aesimcCode) - sha1_enabled = cryptoEnabledCheckCode % { "mask" : 0xF00 } + sha1_enabled = cryptoEnabledCheckCode % { + "extension" : "ArmExtension::FEAT_SHA1" + } cryptoRegRegRegInst("sha1c", "SHA1C64", "SimdSha1HashOp", sha1_enabled, sha1_cCode) cryptoRegRegRegInst("sha1p", "SHA1P64", "SimdSha1HashOp", @@ -157,7 +161,9 @@ let {{ cryptoRegRegInst("sha1su1", "SHA1SU164", "SimdShaSigma2Op", sha1_enabled, sha1_su1Code) - sha2_enabled = cryptoEnabledCheckCode % { "mask" : 0xF000 } + sha2_enabled = cryptoEnabledCheckCode % { + "extension" : "ArmExtension::FEAT_SHA256" + } cryptoRegRegRegInst("sha256h", "SHA256H64", "SimdSha256HashOp", sha2_enabled, sha256_hCode) cryptoRegRegRegInst("sha256h2", "SHA256H264", "SimdSha256Hash2Op", diff --git a/src/arch/arm/isa/insts/data64.isa b/src/arch/arm/isa/insts/data64.isa index 87f87130ce..02ea53881d 100644 --- a/src/arch/arm/isa/insts/data64.isa +++ b/src/arch/arm/isa/insts/data64.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2011-2013, 2016-2023 Arm Limited +// Copyright (c) 2011-2013, 2016-2024 Arm Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -361,15 +361,8 @@ let {{ tlbiCode = msr_check_code + ''' performTlbi(xc, flat_idx, XOp1); ''' - msrTlbiIop = ArmInstObjParams("msr", "Tlbi64LocalHub", "TlbiOp64", - tlbiCode, - ["IsSerializeAfter", "IsNonSpeculative"]) - header_output += MiscRegRegOp64Declare.subst(msrTlbiIop) - decoder_output += MiscRegRegOp64Constructor.subst(msrTlbiIop) - exec_output += BasicExecute.subst(msrTlbiIop) - dvmCode = ''' - if (dvmEnabled) { + if (%(dvmCheck)s) { Request::Flags memAccessFlags = Request::STRICT_ORDER | Request::TLBI; @@ -378,9 +371,30 @@ let {{ PendingDvm = true; } ''' + msrTlbiIop = ArmInstObjParams("msr", "Tlbi64LocalHub", "TlbiOp64", + { + "code" : tlbiCode, + "dvm_code" : dvmCode % + { + "dvmCheck" : "HCR hcr = Hcr64; hcr.fb && dvmEnabled" + } + }, + ["IsSerializeAfter", "IsNonSpeculative"]) + header_output += DvmTlbiDeclare.subst(msrTlbiIop) + decoder_output += DvmTlbiConstructor.subst(msrTlbiIop) + exec_output += BasicExecute.subst(msrTlbiIop) + exec_output += DvmInitiateAcc.subst(msrTlbiIop) + exec_output += DvmCompleteAcc.subst(msrTlbiIop) + msrTlbiSIop = ArmInstObjParams("msr", "Tlbi64ShareableHub", "TlbiOp64", - { "code" : tlbiCode, "dvm_code" : dvmCode }, - ["IsSerializeAfter", "IsNonSpeculative"]) + { + "code" : tlbiCode, + "dvm_code" : dvmCode % + { + "dvmCheck" : "dvmEnabled" + } + }, + ["IsSerializeAfter", "IsNonSpeculative"]) header_output += DvmTlbiDeclare.subst(msrTlbiSIop) decoder_output += DvmTlbiConstructor.subst(msrTlbiSIop) exec_output += BasicExecute.subst(msrTlbiSIop) diff --git a/src/arch/arm/isa/insts/misc.isa b/src/arch/arm/isa/insts/misc.isa index 9ee753e385..35b310ecb9 100644 --- a/src/arch/arm/isa/insts/misc.isa +++ b/src/arch/arm/isa/insts/misc.isa @@ -848,7 +848,8 @@ let {{ ''' unknownIop = ArmInstObjParams("unknown", "Unknown", "UnknownOp", \ { "code": unknownCode, - "predicate_test": predicateTest }) + "predicate_test": predicateTest }, + ['IsInvalid']) header_output += BasicDeclare.subst(unknownIop) decoder_output += BasicConstructor.subst(unknownIop) exec_output += PredOpExecute.subst(unknownIop) diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa index 5678195415..266467e9d8 100644 --- a/src/arch/arm/isa/insts/misc64.isa +++ b/src/arch/arm/isa/insts/misc64.isa @@ -183,7 +183,7 @@ let {{ return std::make_shared(machInst, true); ''' unknown64Iop = ArmInstObjParams("unknown", "Unknown64", "UnknownOp64", - unknownCode) + unknownCode, ['IsInvalid']) header_output += BasicDeclare.subst(unknown64Iop) decoder_output += BasicConstructor64.subst(unknown64Iop) exec_output += BasicExecute.subst(unknown64Iop) diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 9999843b59..148a31fdbc 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -325,6 +325,28 @@ output header {{ } } + + // Decodes binary with immediate operand, destructive, unpredicated + // SVE instructions, handling unsigned variants only. + template