diff --git a/src/arch/SConscript b/src/arch/SConscript index 226b50d715..534045e3a1 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -67,14 +67,17 @@ env.SwitchingHeaders( '''), env.subst('${TARGET_ISA}')) -if env['BUILD_GPU']: - env.SwitchingHeaders( - Split(''' - gpu_decoder.hh - gpu_isa.hh - gpu_types.hh - '''), - env.subst('${TARGET_GPU_ISA}')) +amdgpu_isa = ['gcn3'] + +env.SwitchingHeaders( + Split(''' + gpu_decoder.hh + gpu_isa.hh + gpu_registers.hh + gpu_types.hh + '''), + '{}'.format('amdgpu/' if env['TARGET_GPU_ISA'] in amdgpu_isa else '')+ + env.subst('${TARGET_GPU_ISA}')) ################################################################# # diff --git a/src/arch/gcn3/SConscript b/src/arch/amdgpu/gcn3/SConscript similarity index 100% rename from src/arch/gcn3/SConscript rename to src/arch/amdgpu/gcn3/SConscript diff --git a/src/arch/gcn3/SConsopts b/src/arch/amdgpu/gcn3/SConsopts similarity index 100% rename from src/arch/gcn3/SConsopts rename to src/arch/amdgpu/gcn3/SConsopts diff --git a/src/arch/amdgpu/gcn3/ast_interpreter.py b/src/arch/amdgpu/gcn3/ast_interpreter.py new file mode 100644 index 0000000000..99ddd3c103 --- /dev/null +++ b/src/arch/amdgpu/gcn3/ast_interpreter.py @@ -0,0 +1,2895 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import copy +import os +import re +import sys + +from ast_objects import * +from description_objects import * +from description_parser import DescriptionParser, ParseError +from hand_coded import * + +from pprint import pprint, pformat + +# generate code using the gem5 style +class CodeGen(object): + def __init__(self, file): + self.file = file + self.line_max = 80 + self.semi_stack = '' + self.tab = ' ' + self.indent = '' + self.code = '' + + # write self.code out to file + def generate(self): + fd = open(self.file, 'w') + fd.write(self.code) + fd.close() + + # add an indent level + def inc_indent(self): + self.indent += self.tab + + # remove an indent level + def dec_indent(self): + self.indent = self.indent[0:-len(self.tab)] + + # add an optional semicolon after brace for matched cg_end() + def push_semi(self, semi): + if semi == '': + semi = ' ' + self.semi_stack += semi + + # used by cg_end to decide if a semicolon should follow the brace + def pop_semi(self): + semi = self.semi_stack[-1:] + self.semi_stack = self.semi_stack[0:-1] + if semi == ' ': + semi = '' + return semi + + def smart_split(self, line): + max = self.line_max - len(self.indent) - len('\n') + if len(line) < max: + self.code += '%s%s\n' % (self.indent, line) + else: + pad = '' + while len(line) > max: + high = max + looking = True + while looking and high > 8: + low = high - 8 + for s in ['&&', '>=', '<<', '>>', ' *', ' +', ' =', + ' ?', ' :', ',']: + pos = line[low:high].find(s) + if pos > 0: + mid = low + pos + len(s) + self.code += '%s%s\n' % (self.indent, line[0:mid]) + line = self.tab + line[mid:].strip() + looking = False + break + high -= 4 + if looking: + import pdb; pdb.set_trace() + self.code += '%s%s\n' % (self.indent, line) + + # close a block with brace and optional semicolon + # }[;] + def cg_end(self, comment): + self.dec_indent() + self.code += self.indent + '}' + semi = self.pop_semi() + if semi: + self.code += semi + if comment: + self.code += ' // %s\n' % comment + + # class [: public ] + # [, public ] + # [, . . . ] + # [, public ] + # { + def cg_class(self, name, base): + self.code += self.indent + self.code += 'class ' + self.code += name + b0 = base[0:1] + if b0: + self.code += ' : public %s' % base[0] + if base[1:]: + spaces = self.indent + (' ' * (len(name) + 7)) + for b in base[1:]: + self.code += '\n%s, public %s' % (spaces, b) + self.code += '\n%s{\n' % self.indent + self.push_semi(';') + self.inc_indent() + + # : + def cg_scope(self, scope): + self.code += '%s%s\n' % (self.indent[0:-2], scope) + + # [] + # [::]([, , ... , ) qual + # { + def cg_method(self, typ, base, name, args, ini, qual=None): + if typ: + self.code += '%s%s\n' % (self.indent, typ) + if base: + line = '%s%s::%s(' % (self.indent, base, name) + else: + line = '%s%s(' % (self.indent, name) + if args: + left = self.line_max - len(line) + if (len(args[0]) + 2) < left: + line += args[0] + else: + self.code += '%s\n' % line + line = '%s %s' % (self.indent, args[0]) + for a in args[1:]: + left = self.line_max - len(line) + if (len(a) + 2) < left: + line += ', %s' % a + else: + self.code += '%s,\n' % line + line = '%s %s' % (self.indent, a) + if qual: + self.code += '%s) %s\n' % (line, qual) + else: + self.code += '%s)\n' % line + if ini: + separator = '%s : ' % self.indent + for i in ini: + self.code += '%s%s\n' % (separator, i) + separator = '%s , ' % self.indent + + self.code += '%s{\n' % self.indent + self.push_semi('') + self.inc_indent() + + # :: [] = { + # &::, + # &::, + # . . . + # &::, + # } + def cg_table(self, typ, base, name, entries): + self.code += '%s%s %s::%s[] = {\n' % (self.indent, typ, base, name) + spaces = self.indent + self.tab + for entry in entries: + self.code += '%s&%s::%s,\n' % (spaces, base, entry) + self.code += '%s};\n\n' % self.indent + + # union { + def cg_union(self, name): + self.code += '%sunion %s {\n' % (self.indent, name) + self.push_semi(';') + self.inc_indent() + + # struct { + # unsigned int : ; + # . . . + # } ; + # + def cg_struct(self, name, fields): + max = len('pad_00_31') + for f in fields: + sz = len(f[0]) + if sz > max: + max = sz + bit = 0 + self.code += '%sstruct %s {\n' % (self.indent, name) + prefix = '%s%sunsigned int ' % (self.indent, self.tab) + for f in fields: + n = f[0] + v = f[1] + m = f[2] + assert v >= bit, 'fields not in sorted order' + if bit < v: + sz = v - bit + p = 'pad_' + str(bit) + if sz > 1: + p += '_' + str(v - 1) + pad = ' ' * ((max - len(p))) + self.code += '%s%s%s : %d;\n' % (prefix, pad, p, sz) + bit += sz + sz = 1 + m - v + pad = ' ' * (max - len(n)) + self.code += '%s%s%s : %d;\n' % (prefix, pad, n, sz) + bit += sz + self.code += '%s};\n' % self.indent + + # for (; ; ) { + def cg_for(self, init, test, fini): + line = '%sfor (%s;' % (self.indent, init) + left = self.line_max - len(line) + if (len(test) + 2) > left: + self.code += line + line = self.indent + self.tab + left = self.line_max - len(line) + line += ' %s;' % test + if (len(fini) + 2) > left: + self.code += line + line = self.indent + self.tab + left = self.line_max - len(line) + line += ' %s) {\n' % fini + self.code += line + self.inc_indent() + + # if () { + def cg_if(self, test): + self.code += '%sif (%s) {\n' % (self.indent, test) + self.push_semi('') + self.inc_indent() + + # } else if () { + def cg_else_if(self, test): + self.dec_indent() + self.code += '%s} else if (%s) {\n' % (self.indent, test) + self.inc_indent() + + # } else { + def cg_else(self): + self.dec_indent() + self.code += '%s} else {\n' % self.indent + self.inc_indent() + + # + def cg_code(self, code): + self.code += '%s%s\n' % (self.indent, code) + + # + def cg_block(self, lines): + for line in lines: + self.smart_split(line) + + # #include + def cg_include(self, file): + if file[0] == '<': + self.code += '#include %s\n' % file + else: + self.code += '#include "%s"\n' % file + + # namespace + # { + def cg_namespace(self, namespace): + self.code += '%snamespace %s\n' % (self.indent, namespace) + self.code += '%s{\n' % (self.indent) + self.inc_indent() + + # // + def cg_comment(self, comment): + self.code += '%s// %s\n' % (self.indent, comment) + + # gem5 fatal() call + def cg_fatal(self, fatal_cause): + self.code += '%sfatal("%s");\n' % (self.indent, fatal_cause) + + # blank line + def cg_newline(self): + self.code += '\n' + +# CodeFrag is used to build up the code for one data reference +class CodeFrag(object): + def __init__(self, k): + self.key = k + self.ctx = '' # context accessor fragment + self.fld = '' # field reference fragment + self.var = '' # variable name fragment + self.typ = '' # variable type fragment + self.exp = '' # general expression fragment + self.vec = '' # vector index fragment + self.scn = '' # section selector + + def setup(self, initList): + assert type(initList) is list + assert len(initList) == 6 + self.ctx = initList[0] + self.fld = initList[1] + self.var = initList[2] + self.typ = initList[3] + self.exp = initList[4] + self.vec = initList[5] + self.scn = '' + + def __repr__(self): + text = '"key":\t' + repr(self.key) + ',\n' + text += '"ctx":\t' + repr(self.ctx) + ',\n' + text += '"fld":\t' + repr(self.fld) + ',\n' + text += '"var":\t' + repr(self.var) + ',\n' + text += '"typ":\t' + repr(self.typ) + ',\n' + text += '"exp":\t' + repr(self.exp) + ',\n' + text += '"vec":\t' + repr(self.vec) + ',\n' + text += '"scn":\t' + repr(self.scn) + ',\n' + return text + +# GenOne is used to build up the code for one method +class GenOne(object): + def __init__(self, op_inst, cg, is_vec, methods, info): + assert type(cg) is CodeGen + self.op_inst = op_inst + self.cg = cg + self.decl = [] + self.vector = [] + self.scalar = [] + self.store = [] + self.modified = [] + self.tab = ' ' + self.indent = '' + self.store_vars = [] + self.mem_vars = {} + self.info = info + self.mem_load = [] + if is_vec: + self.math = self.vector + self.decl = ['SregU64 exec;'] + #self.load = ['exec = readSpecialReg(gpuDynInst, ' + # 'REG_EXEC);'] + self.load = ['exec = ' + 'gpuDynInst->wavefront()->execMask().to_ullong();'] + self.load_vars = ['exec'] + else: + self.math = self.scalar + self.load = [] + self.load_vars = [] + self.methods = methods + + # change scalar v vector mode + def set_vector(self, is_vec): + if is_vec: + self.math = self.vector + else: + self.math = self.scalar + + # add an indent level + def inc_indent(self): + self.indent += self.tab + + # remove an indent level + def dec_indent(self): + self.indent = self.indent[0:-len(self.tab)] + + # add to the scalar or vector code + def add_math(self, stmt): + while True: + m = re.search('(vmem_\$([^$]+)\$)', stmt) + if m is None: + break; + stmt = stmt.replace(m.group(1), self.mem_vars[m.group(2)]) + self.math.append(self.indent + stmt) + + # add to the scalar code + def add_scalar(self, stmt): + self.scalar.append(self.indent + stmt) + + def decl_type(self, var): + assert type(var) is str + for dcl in self.decl: + if var in dcl: + pos = dcl.find(' ') + return dcl[0:pos] + return 'SregU16' + + def add_decl(self, var, dcl): + for d in self.decl: + if var in d: + return + self.decl.append(dcl) + + def add_src_set(self, src_set): + for s in src_set: + assert type(s) is CodeFrag + if s.scn == 'mem': + if s.exp not in self.mem_vars.keys(): + self.mem_vars[s.exp] = 'vmem_%d' % len(self.mem_vars) + s.var = self.mem_vars[s.exp] + if s.var in self.load_vars: + continue + self.load_vars.append(s.var) + if s.vec != '': + vreg = s.typ.replace('S', 'V') + src_dec = '%s %s;' % (vreg, s.var) + else: + sreg = s.typ + src_dec = '%s %s;' % (sreg, s.var) + self.add_decl(s.var, src_dec) + if s.var not in self.modified: + shift_fld = False + # fixup SBASE, which needs to be shifted left by 1 bit + if s.fld == 'instData.SBASE': + shift_fld = True + if s.scn == 'mem': + if s.ctx != 'MEM': + import pdb; pdb.set_trace() + if '+' in s.exp: + pos = s.exp.find('+') + 1 + off = s.exp[pos:].strip() + string = 'calculateAddr<%s>(gpuDynInst, '\ + '%s, (%s).get(), 0);' + params = (self.decl_type(s.fld), s.fld, off) + typ1 = self.decl_type(s.var) + typ2 = self.decl_type(s.fld) + typ3 = self.decl_type(off) + elif ',' in s.exp: + args = s.exp.split(',') + if len(args) == 3: + a1 = args[1].strip() + a2 = args[2].strip() + string = 'calculateAddr<%s>(gpuDynInst, %s, '\ + '(%s).get(), (%s).get());' + params = (self.decl_type(s.fld), s.fld, a1, a2) + typ1 = self.decl_type(s.var) + typ2 = self.decl_type(s.fld) + typ3 = self.decl_type(a1) + typ4 = self.decl_type(a2) + else: + import pdb; pdb.set_trace() + else: + if 'SMEM' in self.op_inst: + string = 'calculateAddr<%s>(gpuDynInst, %s, '\ + 'offset.get());' + elif 'FLAT' in self.op_inst: + string = 'calculateAddr<%s>(gpuDynInst, %s);' + else: + string = 'calculateAddr<%s>(gpuDynInst, %s, 0, 0);' + params = (self.decl_type(s.fld), s.fld) + typ1 = self.decl_type(s.var) + typ2 = self.decl_type(s.fld) + mem_read_str = 'initiateMemRead<%s>(gpuDynInst, %s);' + mem_read_params = (self.decl_type(s.var), s.var) + self.mem_load.append(mem_read_str % mem_read_params) + elif s.ctx != '' and s.fld != '': + if shift_fld: + string = '%s = read%s<%s>(gpuDynInst, %s << 1);' + else: + string = '%s = read%s<%s>(gpuDynInst, %s);' + if s.ctx in SpecialCtx: + params = (s.var, SpecialCtx[s.ctx], + self.decl_type(s.var), s.fld) + else: + params = (s.var, + TypeToAccessMethod[self.decl_type(s.var)], + self.decl_type(s.var), s.fld) + typ1 = self.decl_type(s.var) + typ2 = 'uint32_t' + elif s.exp != '': + if shift_fld: + string = '%s = read%s<%s>(gpuDynInst, %s << 1);' + else: + string = '%s = read%s<%s>(gpuDynInst, %s);' + if s.ctx in SpecialCtx: + params = (s.var, SpecialCtx[s.ctx], + self.decl_type(s.var), s.exp) + else: + params = (s.var, + TypeToAccessMethod[self.decl_type(s.var)], + self.decl_type(s.var), s.exp) + typ1 = self.decl_type(s.var) + typ2 = self.decl_type(s.exp) + elif s.ctx != '': + string = '%s = read%s<%s>(gpuDynInst, REG_%s);' + if s.ctx in SpecialCtx: + params = (s.var, SpecialCtx[s.ctx], + self.decl_type(s.var), s.ctx) + else: + params = (s.var, + TypeToAccessMethod[self.decl_type(s.var)], + self.decl_type(s.var)) + typ1 = self.decl_type(s.var) + elif s.fld != '': + string = '%s = %s;' + params = (s.var, s.fld) + else: + continue + self.load.append(string % params) + + def add_dst_set(self, dst_set): + src_set = [] + for d in dst_set: + assert type(d) is CodeFrag + if d.key == 'src': + src_set.append(d) + if src_set: + self.add_src_set(src_set) + + for d in dst_set: + assert type(d) is CodeFrag + if d.key == 'dst': + if d.scn == 'mem': + if d.exp not in self.mem_vars.keys(): + self.mem_vars[d.exp] = 'vmem_%d' % len(self.mem_vars) + d.var = self.mem_vars[d.exp] + # handle destination declarations + if d.var in self.store_vars: + continue + self.store_vars.append(d.var) + if d.vec != '': + vreg = d.typ.replace('S', 'V') + dst_dec = '%s %s;' % (vreg, d.var) + # vector destination regs are also source regs + src_set.append(d) + else: + sreg = d.typ + dst_dec = '%s %s;' % (sreg, d.var) + self.add_decl(d.var, dst_dec) + + # write back destinations + if d.scn == 'mem': + if d.ctx != 'MEM': + import pdb; pdb.set_trace() + if '+' in d.exp: + pos = d.exp.find('+') + 1 + off = d.exp[pos:].strip() + string = 'writeMem<%s>(gpuDynInst, %s, (%s)'\ + '.get(), %s);' + params = (self.decl_type(d.var), d.fld, off, d.var) + typ1 = self.decl_type(d.var) + typ2 = self.decl_type(d.fld) + typ3 = self.decl_type(off) + else: + string = 'writeMem<%s, %s>(gpuDynInst, %s, 0, %s);' + params = (self.decl_type(d.var), self.decl_type(d.fld), + d.fld, d.var) + typ1 = self.decl_type(d.var) + typ2 = self.decl_type(d.fld) + elif d.ctx != '' and d.fld != '': + if d.fld[0] == '-': + string = 'write%s<%s>(gpuDynInst, REG_%s, %s);' + if d.ctx in SpecialCtx: + params = (SpecialCtx[d.ctx], self.decl_type(d.var), + d.ctx, d.var) + else: + params = (d.fld[1:], self.decl_type(d.var), d.var) + typ1 = self.decl_type(d.var) + else: + string = 'write%s<%s>(gpuDynInst, %s, %s);' + if d.ctx in SpecialCtx: + params = (SpecialCtx[d.ctx], self.decl_type(d.var), + d.fld, d.var) + else: + params = + (TypeToAccessMethod[self.decl_type(d.var)], + self.decl_type(d.var), d.fld, d.var) + typ1 = self.decl_type(d.var) + typ2 = 'uint32_t' + elif d.exp != '': + string = 'write%s<%s>(gpuDynInst, %s, %s);' + if d.ctx in SpecialCtx: + params = (SpecialCtx[d.ctx], self.decl_type(d.var), + d.exp, d.var) + else: + params = (d.ctx, self.decl_type(d.var), d.exp, d.var) + typ1 = self.decl_type(d.var) + typ2 = self.decl_type(d.exp) + elif d.ctx != '': + string = 'write%s<%s>(gpuDynInst, REG_%s, %s);' + if d.ctx in SpecialCtx: + params = (SpecialCtx[d.ctx], self.decl_type(d.var), + d.ctx, d.var) + else: + params = (d.ctx, self.decl_type(d.var), d.var) + typ1 = self.decl_type(d.var) + else: + continue + self.store.append(string % params) + + # once all he code for a single execute() method is built up + # the C++ method is generated. memory ops are a special case + # because of the 3-phase nature of their execution. to fully + # execute a memory operation the instruction class must + # implement the following methods: + # + # execute() - issues the reqest to the proper memory pipe, + # i.e., global/local + # + # initiateAcc() - builds a memory request/packet and sends the + # req to memory + # + # completeAcc() - returned data are written back to the + # register file + def finish(self): + self.info.decl = self.decl + + is_store = False + is_load = False + is_atomic = False + + if ('OPF_MEM_STORE' in self.info.flags or 'DS_WRITE_B32' in + self.op_inst or 'DS_WRITE_B64' in self.op_inst): + is_store = True + elif 'OPF_MEM_ATOMIC' in self.info.flags: + is_atomic = True + elif ('LOAD' in self.op_inst or 'DS_READ_B32' in self.op_inst or + 'DS_READ_B64' in self.op_inst): + is_load = True + + if is_store or is_load: + is_smem = 'SMEM' in self.op_inst + is_flat_mem = 'FLAT' in self.op_inst + is_ds_mem = 'DS' in self.op_inst + + self.cg.cg_code('Wavefront *wf = gpuDynInst->wavefront();') + #if not is_flat_mem: + self.cg.cg_code('gpuDynInst->execUnitId = wf->execUnitId;') + if not is_smem: + self.cg.cg_code('gpuDynInst->exec_mask = ' + 'gpuDynInst->wavefront()->execMask();') + self.cg.cg_code('gpuDynInst->latency.init(&gpuDynInst'\ + '->computeUnit()->shader->tick_cnt);') + if is_ds_mem: + self.cg.cg_code('gpuDynInst->latency.set(gpuDynInst'\ + '->computeUnit()->cyclesToTicks(Cycles(24)));') + else: + self.cg.cg_code('gpuDynInst->latency.set(gpuDynInst'\ + '->computeUnit()->clockPeriod());') + self.cg.cg_newline() + if is_smem: + self.cg.cg_if('instData.IMM') + self.cg.cg_code('offset = extData.OFFSET;') + self.cg.cg_else() + self.cg.cg_code('offset = readScalarReg(gpuDynInst,' + 'extData.OFFSET);') + self.cg.cg_end('if') + self.cg.cg_newline() + + self.cg.cg_block(self.load) + + if is_flat_mem and is_store: + has_vgpr_addr = False + for load_entry in self.load: + if 'vgpr_addr' in load_entry: + has_vgpr_addr = True + if has_vgpr_addr: + self.cg.cg_newline() + self.cg.cg_code('typedef decltype(vgpr_src)::RegType ' + 'RegType;') + self.cg.cg_newline() + self.cg.cg_code('assert(!(sizeof(RegType) % ' + 'sizeof(uint32_t))') + self.cg.inc_indent() + self.cg.cg_code(' || sizeof(RegType) < sizeof(uint32_t));') + self.cg.dec_indent() + self.cg.cg_newline() + self.cg.cg_code('int num_words = sizeof(RegType) / ' + 'sizeof(uint32_t);') + self.cg.cg_newline() + self.cg.cg_for('int lane = 0', 'lane < ' + 'wf->computeUnit->wfSize()', + '++lane') + self.cg.cg_if('gpuDynInst->wavefront()->execMask()[lane]') + self.cg.cg_for('int i = 0', 'i < num_words', '++i') + self.cg.cg_code('((uint32_t*)gpuDynInst->d_data)') + self.cg.inc_indent() + self.cg.cg_code('[i * wf->computeUnit->wfSize() + lane] =') + self.cg.inc_indent() + self.cg.cg_code('vgpr_src.getDword(i, lane);') + self.cg.dec_indent() + self.cg.dec_indent() + self.cg.cg_end('for') + self.cg.cg_end('if') + self.cg.cg_end('for') + self.cg.cg_newline() + self.cg.cg_code('calculateAddr(gpuDynInst, ' + 'vgpr_addr);') + if is_ds_mem and is_store: + has_vgpr_addr = False + for load_entry in self.load: + if 'vgpr_a' in load_entry: + has_vgpr_addr = True + if has_vgpr_addr: + self.cg.cg_newline() + self.cg.cg_code('typedef decltype(vgpr_d0)::RegType ' + 'RegType;') + self.cg.cg_newline() + self.cg.cg_code('assert(!(sizeof(RegType) % ' + 'sizeof(uint32_t))') + self.cg.inc_indent() + self.cg.cg_code(' || sizeof(RegType) < sizeof(uint32_t));') + self.cg.dec_indent() + self.cg.cg_newline() + self.cg.cg_code('int num_words = sizeof(RegType) / ' + 'sizeof(uint32_t);') + self.cg.cg_newline() + self.cg.cg_for('int lane = 0', 'lane < ' + 'wf->computeUnit->wfSize()', + '++lane') + self.cg.cg_if('gpuDynInst->wavefront()->execMask()[lane]') + self.cg.cg_for('int i = 0', 'i < num_words', '++i') + self.cg.cg_code('((uint32_t*)gpuDynInst->d_data)') + self.cg.inc_indent() + self.cg.cg_code('[i * wf->computeUnit->wfSize() + lane] =') + self.cg.inc_indent() + self.cg.cg_code('vgpr_d0.getDword(i, lane);') + self.cg.dec_indent() + self.cg.dec_indent() + self.cg.cg_end('for') + self.cg.cg_end('if') + self.cg.cg_end('for') + self.cg.cg_newline() + self.cg.cg_code('calculateAddr(gpuDynInst, ' + 'vgpr_a, 0, 0);') + + self.cg.cg_newline() + + # generate execute(), i.e., issue to appropriate memory pipe + if is_smem: + self.cg.cg_code('gpuDynInst->computeUnit()->scalarMemoryPipe.') + self.cg.inc_indent() + self.cg.cg_code('getGMReqFIFO().push(gpuDynInst);') + self.cg.dec_indent() + self.cg.cg_newline() + if is_load: + self.cg.cg_code('wf->scalarRdGmReqsInPipe--;') + self.cg.cg_code('wf->scalarOutstandingReqsRdGm++;') + else: + self.cg.cg_code('wf->scalarWrGmReqsInPipe--;') + self.cg.cg_code('wf->scalarOutstandingReqsWrGm++;') + elif is_flat_mem: + self.cg.cg_if('gpuDynInst->executedAs() == Enums::SC_GLOBAL') + self.cg.cg_code('gpuDynInst->computeUnit()->globalMemoryPipe.') + self.cg.inc_indent() + self.cg.cg_code('getGMReqFIFO().push(gpuDynInst);') + self.cg.dec_indent() + if is_load: + self.cg.cg_code('wf->rdGmReqsInPipe--;') + self.cg.cg_code('wf->outstandingReqsRdGm++;') + else: + self.cg.cg_code('wf->wrGmReqsInPipe--;') + self.cg.cg_code('wf->outstandingReqsWrGm++;') + self.cg.cg_else() + self.cg.cg_code('assert(false);') + self.cg.cg_end('else') + self.cg.cg_newline() + elif is_ds_mem: + self.cg.cg_code('gpuDynInst->computeUnit()->localMemoryPipe.') + self.cg.inc_indent() + self.cg.cg_code('getLMReqFIFO().push(gpuDynInst);') + self.cg.dec_indent() + self.cg.cg_newline() + if is_load: + self.cg.cg_code('wf->rdLmReqsInPipe--;') + self.cg.cg_code('wf->outstandingReqsRdLm++;') + else: + self.cg.cg_code('wf->wrLmReqsInPipe--;') + self.cg.cg_code('wf->outstandingReqsWrLm++;') + else: + self.cg.cg_if('isLocalMem()') + self.cg.cg_code('gpuDynInst->computeUnit()->localMemoryPipe.') + self.cg.inc_indent() + self.cg.cg_code('getLMReqFIFO().push(gpuDynInst);') + self.cg.dec_indent() + self.cg.cg_else() + self.cg.cg_code('gpuDynInst->computeUnit()->globalMemoryPipe.') + self.cg.inc_indent() + self.cg.cg_code('getGMReqFIFO().push(gpuDynInst);') + self.cg.dec_indent() + self.cg.cg_end('if') + self.cg.cg_newline() + self.cg.cg_code('gpuDynInst->wavefront()->outstandingReqs++;') + self.cg.cg_code('gpuDynInst->wavefront()'\ + '->validateRequestCounters();') + self.cg.cg_end('execute') + self.cg.cg_newline() + + # generate initiateAcc() + self.cg.cg_method('void', self.op_inst, 'initiateAcc', + ['GPUDynInstPtr gpuDynInst'], []) + + #if self.mem_load and 'SMEM' in self.op_inst: + if self.mem_load: + self.cg.cg_block(self.mem_load); + + if is_store: + if self.vector: + self.cg.cg_for('int t = 0', 'exec != 0', 't++, exec >>= 1') + self.cg.cg_if('(exec & 1) != 0') + self.cg.cg_block(self.vector) + self.cg.cg_end('if') # cg_if + self.cg.cg_end('for') # cg_for + + self.cg.cg_block(self.scalar) + self.cg.cg_block(self.store) + + self.cg.cg_end('initiateAcc') + self.cg.cg_newline() + + # generate completeAcc() + self.cg.cg_method('void', self.op_inst, 'completeAcc', + ['GPUDynInstPtr gpuDynInst'], []) + + if is_load: + if self.vector: + self.cg.cg_for('int t = 0', 'exec != 0', 't++, exec >>= 1') + self.cg.cg_if('(exec & 1) != 0') + self.cg.cg_block(self.vector) + self.cg.cg_end('if') # cg_if + self.cg.cg_end('for') # cg_for + + self.cg.cg_block(self.scalar) + self.cg.cg_block(self.store) + + else: + self.cg.cg_block(self.load) + + if self.mem_load: + self.cg.cg_block(self.mem_load); + + if self.vector: + self.cg.cg_for('int t = 0', 'exec != 0', 't++, exec >>= 1') + self.cg.cg_if('(exec & 1) != 0') + self.cg.cg_block(self.vector) + self.cg.cg_end('if') # cg_if + self.cg.cg_end('for') # cg_for + + self.cg.cg_block(self.scalar) + self.cg.cg_block(self.store) + + if self.store: + if is_load or is_store: + self.cg.cg_end('completeAcc') + else: + self.cg.cg_end('execute') + return True + else: + return False + +# a list where list[n] is always list[n] and +# the unreferenced elements have a default fill value +class IndexedList(object): + def __init__(self, size, fill): + self.list = [] + self.size = size + self.fill = fill + self.next_index = 0 + for i in range(0, size): + self.list.append(fill) + def grow(self, index, fill): + old_size = self.size + for i in range(old_size, index): + self.list.append(self.fill) + self.list.append(fill) + self.size = index + 1 + def __len__(self): + return self.size + def __iter__(self): + self.next_index = 0 + return self + def next(self): + while True: + if self.next_index >= self.size: + self.next_index = 0 + raise StopIteration + entry = self.list[self.next_index] + self.next_index += 1 + if entry != None: + return entry + def __getitem__(self, index): + if index >= self.size: + self.grow(index, self.fill) + return self.list[index] + def __setitem__(self, index, value): + if index >= self.size: + self.grow(index, value) + else: + self.list[index] = value + def __repr__(self): + text = '[\n' + for i in range(0, self.size): + text += '\t' + str(i) + ' :\t' + repr(self.list[i]) + '\n' + text += ']' + return text + +# Abstract Syntax Tree Interpreter +class RefinedOpInfo(object): + def __init__(self, name, inst, encode, op_typ): + self.name = name + self.inst = inst.name + self.enc = encode.name + self.sub_enc = op_typ.sub_enc + self.desc = op_typ.desc + self.flags = op_typ.flags + self.num_dst = 0 + self.num_src = 0 + self.dst = [] + self.src = [] + self.decl = [] + + def __repr__(self): + text = '"name":\t' + repr(self.name) + ',\n' + text += '"enc":\t' + repr(self.enc) + ',\n' + text += '"sub enc":\t' + repr(self.sub_enc) + ',\n' + text += '"desc":\t' + repr(self.desc) + ',\n' + text += '"flags":\t' + repr(self.flags) + ',\n' + text += '"num_dst":\t' + repr(self.num_dst) + ',\n' + text += '"num_src":\t' + repr(self.num_src) + ',\n' + text += '"dst":\t' + repr(self.dst) + ',\n' + text += '"src":\t' + repr(self.src) + '\n' + return text + + def override(self, operand): + assert type(operand) is Operand + if operand.num_dst > 0: + self.num_dst = operand.num_dst + if operand.num_src > 0: + self.num_src = operand.num_src + for od in operand.dst: + match = False + for sd in self.dst: + if sd.match(od): + sd.override(od) + match = True + if not match: + self.dst.append(copy.copy(od)) + for os in operand.src: + match = False + for ss in self.src: + if ss.match(os): + ss.override(os) + match = True + if not match: + self.src.append(copy.copy(os)) + +class AstInterpreter(object): + def __init__(self): + self.high_bits = 9 + self.max_bits = 6 + self.prefix = '' + self.constant = {} + self.type_by_value = {} + self.decode_info = {} + self.enc_by_name = {} + self.decode_tables = {} + self.inst_by_name = {} + self.inst_formats = {} + self.inst_fields = {} + self.inst_by_optype = {} + self.op_types_seen = [] + self.invalid_type = TypeClause() + self.invalid_type.name = 'invalid' + self.invalid_type.desc = ['invalid'] + self.default_type = TypeClause() + self.default_type.name = 'default' + self.default_type.desc = ['default'] + main_decode_table = IndexedList(512, 'subDecode_invalid') + self.decode_tables['tableDecodePrimary'] = main_decode_table + self.inst_with_encodings = [] + self.refined_op_info = [] + self.desc_parser = DescriptionParser() + self.look_for_comma_before_equal = False + self.ref_op_info_fixup = ['DS'] + self.methods = [] + + def bits_info(self, pattern, bits): + care_bits = 0 + match = 0 + for b in range(len(pattern) - 1, -1, -1): + if pattern[b] == '0': + care_bits += 1 + elif pattern[b] == '1': + match += 1 << care_bits + care_bits += 1 + shift = bits - care_bits + base = match << shift + copy = 1 << shift + return (base, copy) + + def handle_const(self, inst): + for clause in inst.clauses: + self.constant[clause.name] = clause.value + + def get_type_value_desc(self, type_value, type_list): + for t in type_list: + if t.name == type_value: + return t.desc + names = [] + for k in self.type_by_value.keys(): + tbv = self.type_by_value[k] + for t in range(0, len(tbv)): + if tbv[t] and tbv[t].name == type_value: + return tbv[t].desc + import pdb; pdb.set_trace() + return ['get_type_value_desc(%s)' % (type_value)] + + def handle_type(self, inst): + type_list = IndexedList(0, None) + regexp = re.compile('get_type_value_desc\(([^\)]+)\)') + for clause in inst.clauses: + assert type(clause) is TypeClause + if clause.v_max != 0: + # handle 'ID = NUMBER:NUMBER' clauses + for v in range(clause.value, clause.v_max + 1): + newc = copy.deepcopy(clause) + newc.name = clause.name + str(v) + newc.v_max = 0 + newc.value = v + type_list[v] = newc + elif clause.var == False: + if clause.name != '': + # handle 'ID = NUMBER' clauses + newc = copy.deepcopy(clause) + if newc.desc: + found = regexp.match(newc.desc[0]) + if found: + tval = found.group(1) + tlst = type_list + newc.desc = self.get_type_value_desc(tval, tlst) + type_list[newc.value] = newc + elif clause.type != '': + # handle 'type ID' clauses + for t in self.type_by_value[clause.type]: + type_list[t.value] = t + else: + # handle $(ID} substitution + orig_name = clause.name + a = orig_name.find('${') + b = orig_name.find('}') + var = orig_name[a:b+1] + vartype = self.type_by_value[var[2:-1]] + assert type(vartype) is IndexedList + for t in vartype: + assert type(t) is TypeClause + newc = copy.deepcopy(clause) + newc.name = orig_name.replace(var, t.name) + newc.desc = [] + for d in clause.desc: + edited = d.replace(var, t.desc[0]) + if self.look_for_comma_before_equal: + m = re.match('([^,]+(,[^,=]+)+)=', edited) + if m: + # import pdb; pdb.set_trace() + orig = m.group(1).strip() + repl = '{%s}' % orig + edited = edited.replace(orig, repl) + newc.desc.append(edited) + newc.value = clause.value + t.value + type_list[newc.value] = newc + self.type_by_value[inst.name] = type_list + + # decode an encoding with an OP field + def decode_enc_op(self, inst, enc_field, op_field): + assert type(inst) is InstBlock + assert type(enc_field) is InstField + assert type(op_field) is InstField + op_type = self.type_by_value[op_field.type] + assert type(op_type) is IndexedList + encode = self.enc_by_name[enc_field.enc] + assert type(encode) is EncodingBlock + if op_field.type not in self.inst_by_optype.keys(): + self.inst_by_optype[op_field.type] = inst.name + (base, copy) = self.bits_info(encode.bits, self.high_bits) + low_bit = 32 - self.high_bits + if op_field.value >= low_bit: + shift = op_field.value - low_bit + c = 1 << shift + for t in op_type: + assert type(t) is TypeClause + if 'OPF_INTERNAL' in t.flags: + continue + b = base + (t.value << shift) + n = 'decode_' + op_field.type + '__' + t.name + self.decode_info[n] = [ op_field.type, t ] + for i in range(b, b + c): + self.decode_tables['tableDecodePrimary'][i] = n + elif op_field.v_max >= low_bit: + blk_bits = low_bit - op_field.value + c = 1 << blk_bits + n = 'decode_' + op_field.type + '__invalid' + self.decode_info[n] = [ op_field.type, self.invalid_type ] + decode_table = IndexedList(c, n) + block = 0 + for i in range(base, base + copy): + valid = 0 + for t in op_type: + assert type(t) is TypeClause + if 'OPF_INTERNAL' in t.flags: + continue + if op_field.type == 'OPU_VOP3': + if 'OPF_NOVOP3' in t.flags: + continue + if (t.value >> blk_bits) == block: + valid += 1 + n = 'decode_' + op_field.type + '__' + t.name + self.decode_info[n] = [ op_field.type, t ] + decode_table[t.value] = n + if valid > 0: + # make sure table grows to a multiple of + # block size by referencing last entry + n = decode_table[((block + 1) << blk_bits) - 1] + # + n = 'tableSubDecode_' + op_field.type + self.decode_tables[n] = decode_table + n = 'subDecode_' + op_field.type + self.decode_tables['tableDecodePrimary'][i] = n + else: + n = 'decode_' + op_field.type + '__invalid' + self.decode_info[n] = [ op_field.type, self.invalid_type ] + self.decode_tables['tableDecodePrimary'][i] = n + block += 1 + else: + op_bits = 1 + op_field.v_max - op_field.value + for i in range(base, base + copy): + c = 1 << op_bits + n = 'decode_' + op_field.type + '__invalid' + self.decode_info[n] = [ op_field.type, self.invalid_type ] + decode_table = IndexedList(c, n) + valid = 0 + for t in op_type: + assert type(t) is TypeClause + if 'OPF_INTERNAL' in t.flags: + continue + valid += 1 + n = 'decode_' + op_field.type + '__' + t.name + self.decode_info[n] = [ op_field.type, t ] + decode_table[t.value] = n + if valid > 0: + n = 'tableSubDecode_' + op_field.type + self.decode_tables[n] = decode_table + n = 'subDecode_' + op_field.type + self.decode_tables['tableDecodePrimary'][i] = n + else: + n = decode_table[0] + self.decode_tables['tableDecodePrimary'][i] = n + + def find_relevant_operand(self, operands, op_typ): + default_operand = None + for operand in operands: + if operand.parent_enc and operand.sub_enc: + if operand.parent_enc == op_typ.parent_enc: + if operand.sub_enc == op_typ.sub_enc: + return operand + continue + if operand.parent_enc: + if operand.parent_enc == op_typ.parent_enc: + return operand + continue + if operand.sub_enc: + if operand.sub_enc == op_typ.sub_enc: + return operand + if operand.sub_enc == 'NEVER': + # NEVER indicates a deeper set of operands + # which share this operand's attributes + if operand.operands: + sub = self.find_relevant_operand(operand.operands, + op_typ) + if sub: + merge = copy.deepcopy(operand) + merge.override(sub) + return merge + continue + if operand.flags: + if operand.flags[0] in op_typ.flags: + return operand + continue + default_operand = operand + return default_operand + + # return True if when clause matches op type + def when_match(self, when, op_typ): + if when.left == 'flags': + for f in when.right: + if f not in op_typ.flags: + return False + return True + return False + + def refine_op_info(self, ref_op_info, encode, op_typ): + assert type(ref_op_info) is RefinedOpInfo + assert type(encode) is EncodingBlock + assert type(op_typ) is TypeClause + relevant_operand = self.find_relevant_operand(encode.operands, op_typ) + if not relevant_operand: + return + ref_op_info.override(relevant_operand) + if op_typ.flags: + for when in relevant_operand.when: + if self.when_match(when, op_typ): + ref_op_info.override(when.operand) + sub_operands = relevant_operand.operands + if sub_operands: + sub_operand = self.find_relevant_operand(sub_operands, op_typ) + if sub_operand: + ref_op_info.override(sub_operand) + if op_typ.size >= 0 or encode.size >= 0: + default_size = op_typ.size + if default_size < 0: + default_size = encode.size + for d in ref_op_info.dst: + if not d.fmt: + if d.size < 0: + d.size = default_size + for s in ref_op_info.src: + if not s.fmt: + if s.size < 0: + s.size = default_size + if encode.name in self.ref_op_info_fixup: + suffix = op_typ.name[-3:] + if suffix not in SuffixToFmt.keys(): + return + fmt = SuffixToFmt[suffix] + for d in ref_op_info.dst: + d.fmt = fmt + for s in ref_op_info.src: + if s.index != 0: + s.fmt = fmt + + # generate instructions for an encoding with an OP field + def gen_inst(self, inst_tag, inst, encode, op_typ): + assert type(inst_tag) is str + assert type(inst) is InstBlock + assert type(encode) is EncodingBlock + assert type(op_typ) is TypeClause + if 'OPF_INTERNAL' in op_typ.flags: + return + if encode.name == 'VOP3': + if 'OPF_NOVOP3' in op_typ.flags: + return + ref_op_info = RefinedOpInfo(inst_tag, inst, encode, op_typ) + self.refine_op_info(ref_op_info, encode, op_typ) + self.refined_op_info.append(ref_op_info) + + # generate instructions for an encoding with an OP field + def gen_inst_enc_op(self, inst, enc_field, op_field): + op_type = self.type_by_value[op_field.type] + encode = self.enc_by_name[enc_field.enc] + for t in op_type: + inst_tag = enc_field.enc + '__' + t.name + self.gen_inst(inst_tag, inst, encode, t) + + # an encoding with an OP field + def handle_enc_op(self, inst, enc_field, op_field): + if op_field.type not in self.op_types_seen: + self.op_types_seen.append(op_field.type) + self.handle_parent_enc(inst, enc_field, op_field) + self.decode_enc_op(inst, enc_field, op_field) + self.gen_inst_enc_op(inst, enc_field, op_field) + + # decode an encoding with no OP field + def decode_enc_no_op(self, inst, enc_field): + encode = self.enc_by_name[enc_field.enc] + (base, copy) = self.bits_info(encode.bits, self.high_bits) + low_bit = 32 - self.high_bits + n = 'decode_OP_' + enc_field.enc + self.decode_info[n] = [ 'OP_' + enc_field.enc, self.default_type ] + for i in range(base, base + copy): + self.decode_tables['tableDecodePrimary'][i] = n + + # generate instructions for an encoding with no OP field + def gen_inst_enc_no_op(self, inst, enc_field): + op_field = InstField() + op_field.tag = 'virtual' + op_field.name = 'OP' + op_field.desc = 'default OP field for encodings with no OP field' + op_field.type = 'OP_' + enc_field.enc + self.gen_inst_enc_op(inst, enc_field, op_field) + + # an encoding with no OP field + def handle_enc_no_op(self, inst, enc_field): + self.decode_enc_no_op(inst, enc_field) + self.gen_inst_enc_no_op(inst, enc_field) + + + def handle_parent_enc_search_replace(self, operands, op_type): + for opr in operands: + if opr.sub_enc == 'NEVER': + self.handle_parent_enc_search_replace(opr.operands, op_type) + elif opr.parent_enc != '': + # add '_' after first character for OFFSET/COUNT prefix + tag = opr.parent_enc[0] + '_' + opr.parent_enc[1:] + match = tag + '_OFFSET' + offset = 0 + found = False + for t in op_type: + if t.name == match: + offset = t.value + found = True + if found != False: + op = 'OP_' + opr.parent_enc + par_enc_type = self.type_by_value[op] + add_flags = '' + for t in par_enc_type: + newt = copy.deepcopy(t) + newt.value += offset + newt.parent_enc = opr.parent_enc + newt.flags.append('OPF_PEN_%s' % opr.parent_enc) + op_type[newt.value] = newt + # check to make sure entry at offset was replaced + if op_type[offset].name == match: + op_type[offset] = None + + # handle an encoding with a parent_enc entry + def handle_parent_enc(self, inst, enc_field, op_field): + encode = self.enc_by_name[enc_field.enc] + op_type = self.type_by_value[op_field.type] + self.handle_parent_enc_search_replace(encode.operands, op_type) + + # build a table to lookup in which word an inst.field is found + def build_inst_fields(self, inst): + assert type(inst) is InstBlock + if re.search('[Ss]econd', inst.desc): + which_word = 'extData' + else: + which_word = 'instData' + + key = re.match('([^_]+)', inst.name).group(1) + if key == 'VOP': + key = inst.name + + if key in self.inst_fields.keys(): + table = self.inst_fields[key] + else: + self.inst_fields[key] = table = {} + for field in inst.fields: + table[field.name] = which_word + + def handle_inst(self, inst): + assert type(inst) is InstBlock + self.inst_by_name[inst.name] = inst + self.build_inst_fields(inst) + op_field = None + enc_field = None + # field_list will be in a form that cg_struct can use + field_list = [] + for field in inst.fields: + field_list.append([ field.name, field.value, field.v_max ]) + if field.name == 'OP': + op_field = field + elif field.name == 'ENCODING': + if field.enc != '': + enc_field = field + self.inst_formats[inst.name] = field_list + if enc_field != None: + if inst.name not in self.inst_with_encodings: + self.inst_with_encodings.append(inst.name) + if op_field != None: + self.handle_enc_op(inst, enc_field, op_field) + else: + self.handle_enc_no_op(inst, enc_field) + else: + assert op_field == None, 'inst %s enc:None op: %s' % ( + inst.name, op_field.type) + + def handle_encoding(self, statement): + self.enc_by_name[statement.name] = statement + (base, copy) = self.bits_info(statement.bits, self.high_bits) + n = 'decode_OP_' + statement.name + '__invalid' + self.decode_info[n] = [ 'OP_' + statement.name, self.invalid_type ] + primary_decode_table = self.decode_tables['tableDecodePrimary'] + for i in range(base, base + copy): + if primary_decode_table[i] == 'subDecode_invalid': + primary_decode_table[i] = n + + # check for vector v scalar instruction encodings + def vector_or_scalar(self): + enc_vec = {} + for k in self.enc_by_name.keys(): + e = self.enc_by_name[k] + if 'Vector ALU' in e.desc[0]: + enc_vec[e.name] = True + else: + enc_vec[e.name] = False + + inst_vec = {} + for k in self.inst_by_name.keys(): + i = self.inst_by_name[k] + is_vec = False + for f in i.fields: + if f.type == 'VGPR': + is_vec = True + inst_vec[i.name] = is_vec + + # check the second word if a second word exists + for k in self.inst_by_name.keys(): + i = self.inst_by_name[k] + if '_1' in i.name: + continue + if inst_vec[i.name]: + continue + if '_' in i.name: + other_word = re.sub('_.*', '_1', i.name) + else: + other_word = '%s_1' % i.name + if other_word in inst_vec.keys(): + if inst_vec[other_word]: + inst_vec[i.name] = True + + for info in self.refined_op_info: + if enc_vec[info.enc] or inst_vec[info.inst]: + if 'OPF_VECTOR' not in info.flags: + info.flags.append('OPF_VECTOR') + else: + if 'OPF_SCALAR' not in info.flags: + info.flags.append('OPF_SCALAR') + + def post_process_statements(self): + self.vector_or_scalar() + + def process_statements(self, statements): + for statement in statements: + if statement.keyword == 'const': + self.handle_const(statement) + elif statement.keyword == 'type': + self.handle_type(statement) + elif statement.keyword == 'encoding': + self.handle_encoding(statement) + elif statement.keyword == 'inst': + self.handle_inst(statement) + self.post_process_statements() + + def is_special_lit(self, inst): + for field in inst.fields: + if field.type == '': + continue + tbv = self.type_by_value[field.type] + assert type(tbv) is IndexedList + for t in range(0, len(tbv)): + if tbv[t] and tbv[t].name == 'SRC_LITERAL': + return True + return False + + def is_var_size_enc(self, pri, sec): + if self.is_special_lit(self.inst_by_name[pri]): + return True + if sec in self.inst_by_name.keys(): + if self.is_special_lit(self.inst_by_name[sec]): + return True + return False + + # gpu_decoder.hh + def generate_decoder_hh(self, output_dir): + file = os.path.join(output_dir, 'gpu_decoder.hh') + cg = CodeGen(file) + + cg.cg_code('#ifndef __GPU_INTERNAL_ARCH_VI_DECODER_HH__') + cg.cg_code('#define __GPU_INTERNAL_ARCH_VI_DECODER_HH__') + cg.cg_newline() + + cg.cg_include('') + cg.cg_include('') + cg.cg_newline() + + cg.cg_include('gpu-internal/arch/vi/gpu_types.hh') + cg.cg_newline() + cg.cg_code('class GPUStaticInst;') + cg.cg_newline() + + cg.cg_namespace('ViISA') + + cg.cg_code('class Decoder;') + cg.cg_code('union InstFormat;') + cg.cg_newline() + cg.cg_code('using IsaDecodeMethod = GPUStaticInst*' + '(Decoder::*)(MachInst);') + cg.cg_newline() + cg.cg_class('Decoder', []) + cg.cg_scope('public:') + cg.cg_code('Decoder();') + cg.cg_code('~Decoder();') + cg.cg_newline() + + cg.cg_code('GPUStaticInst* decode(MachInst);') + cg.cg_newline() + + cg.cg_method('GPUStaticInst*', '', 'decode', ['RawMachInst inst'], []) + cg.cg_code('return inst < decodedInsts.size() ? decodedInsts'\ + '.at(inst) : nullptr;') + cg.cg_end(None) + cg.cg_newline() + cg.cg_newline() + + cg.cg_method('RawMachInst', '', + 'saveInst', [ 'GPUStaticInst *decodedInst' ], []) + cg.cg_code('decodedInsts.push_back(decodedInst);') + cg.cg_code('return decodedInsts.size() - 1;') + cg.cg_end(None) + cg.cg_newline() + cg.cg_newline() + + cg.cg_scope('private:') + cg.cg_code('static std::vector decodedInsts;') + cg.cg_newline() + + methods = [] + for key in sorted(self.decode_tables.keys()): + table = self.decode_tables[key] + for m in table: + if m not in methods: + methods.append(m) + size = len(table) + cg.cg_code('static IsaDecodeMethod %s[%d];' % (key, size)) + + cg.cg_newline() + + for m in sorted(methods): + cg.cg_code('GPUStaticInst* %s(MachInst);' % m) + cg.cg_code('GPUStaticInst* decode_invalid(MachInst);') + cg.cg_end('class Decoder') # cg_class + + for key in sorted(self.inst_formats.keys()): + cg.cg_newline() + cg.cg_struct('InFmt_' + key, self.inst_formats[key]) + + cg.cg_newline() + cg.cg_union('InstFormat') + max = 0 + for key in sorted(self.inst_formats.keys()): + sz = len(key) + if max < sz: + max = sz + for key in sorted(self.inst_formats.keys()): + pad = ' ' * (max - len(key)) + cg.cg_code('InFmt_%s %siFmt_%s;' % (key, pad, key)) + dtyp = 'unsigned int' + dfld = 'imm_u32' + pad = ' ' * (max - len(dtyp)) + cg.cg_code('%s %s%s;' % (dtyp, pad, dfld)) + dtyp = 'float' + dfld = 'imm_f32' + pad = ' ' * (max - len(dtyp)) + cg.cg_code('%s %s%s;' % (dtyp, pad, dfld)) + cg.cg_end('union InstFormat') # cg_union + + cg.cg_end('namespace ViISA') # cg_namespace + cg.cg_newline() + + cg.cg_code('#endif // __GPU_INTERNAL_ARCH_VI_DECODER_HH__') + cg.generate() + + # decoder.cc base + def generate_decoder_cc(self, output_dir): + file = os.path.join(output_dir, 'decoder.cc') + cg = CodeGen(file) + + cg.cg_include('') + cg.cg_newline() + + cg.cg_include('gpu-internal/arch/vi/gpu_decoder.hh') + cg.cg_include('gpu-internal/arch/vi/gpu_static_inst.hh') + cg.cg_include('gpu-internal/arch/vi/instructions.hh') + cg.cg_newline() + + cg.cg_namespace('ViISA') + + cg.cg_method(None, 'Decoder', 'Decoder', [], []) + cg.cg_end('Decoder') # cg_method + cg.cg_newline() + + cg.cg_method(None, 'Decoder', '~Decoder', [], []) + cg.cg_end('~Decoder') # cg_method + cg.cg_newline() + + for key in sorted(self.decode_tables.keys()): + table = self.decode_tables[key] + cg.cg_table('IsaDecodeMethod', 'Decoder', key, table) + + cg.cg_method('GPUStaticInst*', 'Decoder', + 'decode', [ 'MachInst iFmt' ], []) + cg.cg_code('InFmt_SOP1 *enc = &iFmt->iFmt_SOP1;') + cg.cg_code('IsaDecodeMethod method = ' + + 'tableDecodePrimary[enc->ENCODING];') + cg.cg_code('return (this->*method)(iFmt);') + cg.cg_end('decode') # cg_method + + decoders = [] + sub_decoders = [] + for key in sorted(self.decode_tables.keys()): + table = self.decode_tables[key] + for m in table: + if m[0:7] == 'decode_': + if m not in decoders: + decoders.append(m) + elif m[0:10] == 'subDecode_': + if m not in sub_decoders: + sub_decoders.append(m) + + for m in sub_decoders: + cg.cg_newline() + cg.cg_method('GPUStaticInst*', 'Decoder', \ + m, ['MachInst iFmt'], []) + if m == 'subDecode_invalid': + cg.cg_code('return decode_invalid(iFmt);') + else: + o = m[10:] + e = self.inst_by_optype[o] + t = 'tableSubDecode_' + o + cg.cg_code('InFmt_' + e + ' *enc = &iFmt->iFmt_' + e + ';') + cg.cg_code('IsaDecodeMethod method = ' + t + '[enc->OP];') + cg.cg_code('return (this->*method)(iFmt);') + cg.cg_end(m) # cg_method + + for m in decoders: + cg.cg_newline() + cg.cg_method('GPUStaticInst*', 'Decoder', + m, ['MachInst iFmt'], []) + [ op_type, t ] = self.decode_info[m] + op_enc = re.sub('OP[U]?_', '', op_type) + op_fmt = 'iFmt_%s' % op_enc + if op_fmt == 'iFmt_VOP3': + vccd = 'OPF_VCCD' in t.flags + vopc = 'OPF_PEN_VOPC' in t.flags + if vccd or vopc: + op_fmt = 'iFmt_VOP3_SDST_ENC' + op_inst = 'Inst_%s__%s' % (op_enc, t.name) + cg.cg_code('return new %s(&iFmt->%s);' % (op_inst, op_fmt)) + cg.cg_end(m) # cg_method + + cg.cg_newline() + cg.cg_method('GPUStaticInst*', 'Decoder', + 'decode_invalid', [ 'MachInst iFmt' ], []) + cg.cg_code('return new Inst_invalid(iFmt);') + cg.cg_end('decode_invalid') # cg_method + + cg.cg_code('std::vector Decoder::decodedInsts;') + cg.cg_newline() + + cg.cg_end('namespace ViISA') # cg_namespace + + cg.generate() + + # instructions.hh + def generate_instructions_hh(self, output_dir): + file = os.path.join(output_dir, 'instructions.hh') + cg = CodeGen(file) + + cg.cg_code('#ifndef __GPU_INTERNAL_ARCH_VI_INSTRUCTIONS_HH__') + cg.cg_code('#define __GPU_INTERNAL_ARCH_VI_INSTRUCTIONS_HH__') + cg.cg_newline() + + cg.cg_include('gpu-internal/arch/vi/gpu_decoder.hh') + cg.cg_include('gpu-internal/arch/vi/gpu_static_inst.hh') + cg.cg_include('gpu-internal/arch/vi/op_encodings.hh') + cg.cg_newline() + + cg.cg_namespace('ViISA') + + for info in self.refined_op_info: + op_op = re.sub('.*__', '', info.name) + op_enc = re.sub('__.*', '', info.name) + op_inst = 'Inst_%s__%s' % (op_enc, op_op) + op_base = 'Inst_%s' % op_enc + op_fmt = 'InFmt_%s' % op_enc + if op_enc == 'VOP3': + vccd = 'OPF_VCCD' in info.flags + vopc = 'OPF_PEN_VOPC' in info.flags + if vccd or vopc: + op_base = 'Inst_VOP3_SDST_ENC' + op_fmt = 'InFmt_VOP3_SDST_ENC' + + cg.cg_class(op_inst, [ op_base ]) + cg.cg_scope('public:') + cg.cg_code('%s(%s*);' % (op_inst, op_fmt)) + + cg.cg_code('~%s();' % op_inst) + cg.cg_newline() + + n_dst = 0 + n_src = 0 + + if info.sub_enc != 'SEN_NODST' and info.sub_enc != 'SEN_G_FORK': + n_dst = info.num_dst + + if info.sub_enc != 'SEN_NOSRC': + n_src = info.num_src + + if 'OPF_RDVCC' in info.flags: + n_src += 1 + + cg.cg_method('int', None, 'getNumOperands', None, None, + 'override') + cg.cg_code('return numDstRegOperands() + numSrcRegOperands();') + cg.cg_end('getNumOperands') + + cg.cg_newline() + cg.cg_code('int numDstRegOperands() override { return %i; }' + % n_dst) + cg.cg_code('int numSrcRegOperands() override { return %i; }' + % n_src) + + # int getOperandSize(int opIdx); + op_idx = 0 + cg.cg_newline() + cg.cg_method('int', None, 'getOperandSize', ['int opIdx'], None, + 'override') + cg.cg_code('switch (opIdx) {') + for src_op in info.src: + if ((src_op.name == 'carryin' or src_op.name == 'vcc') and + 'OPF_VCCS' not in info.flags): + continue + if src_op.name == 'vgpr_d0' and 'OPF_DS1D' not in info.flags: + continue + if src_op.name == 'vgpr_d1' and 'OPF_DS2D' not in info.flags: + continue + if not src_op.fmt: + op_size = (src_op.size * 32) / 8 + elif src_op.fmt == 'RSRC_TYPED' or src_op.fmt == 'SAMP': + op_size = 4 + else: + op_size = self.fmt_to_details(src_op.fmt)[1] / 8 + cg.cg_code(' case %i: //%s' % (op_idx, src_op.name)) + cg.cg_code(' return %i;' % op_size) + op_idx += 1 + if 'OPF_RDVCC' in info.flags: + cg.cg_code(' case %i:' % op_idx) + cg.cg_code(' return 8;') + op_idx += 1 + for dst_op in info.dst: + if ((dst_op.name == 'carryout' or dst_op.name == 'vcc') and + 'OPF_VCCD' not in info.flags and op_enc != 'VOPC'): + continue + if dst_op.name == 'vgpr_rtn' and 'OPF_DSRTN' not in info.flags: + continue + if (op_inst == 'Inst_SMEM__S_LOAD_DWORD' + or op_inst == 'Inst_FLAT__FLAT_LOAD_DWORD'): + op_size = 4 + elif not dst_op.fmt: + op_size = (dst_op.size * 32) / 8 + else: + op_size = self.fmt_to_details(dst_op.fmt)[1] / 8 + cg.cg_code(' case %i: //%s' % (op_idx, dst_op.name)) + cg.cg_code(' return %i;' % op_size) + op_idx += 1 + cg.cg_code(' default:') + cg.cg_code(r' fatal("op idx %i out of bounds\n", opIdx);') + cg.cg_code(' return -1;') + cg.cg_code('}') + cg.cg_end('getOperandSize') + + # bool isSrcOperand(int opIdx); + op_idx = 0 + cg.cg_newline() + cg.cg_method('bool', None, 'isSrcOperand', ['int opIdx'], None, + 'override') + cg.cg_code('switch (opIdx) {') + for src_op in info.src: + if ((src_op.name == 'carryin' or src_op.name == 'vcc') and + 'OPF_VCCS' not in info.flags): + continue + if src_op.name == 'vgpr_d0' and 'OPF_DS1D' not in info.flags: + continue + if src_op.name == 'vgpr_d1' and 'OPF_DS2D' not in info.flags: + continue + cg.cg_code(' case %i: //%s' % (op_idx, src_op.name)) + cg.cg_code(' return true;') + op_idx += 1 + if 'OPF_RDVCC' in info.flags: + cg.cg_code(' case %i:' % op_idx) + cg.cg_code(' return true;') + op_idx += 1 + for dst_op in info.dst: + if ((dst_op.name == 'carryout' or dst_op.name == 'vcc') and + 'OPF_VCCD' not in info.flags): + continue + if dst_op.name == 'vgpr_rtn' and 'OPF_DSRTN' not in info.flags: + continue + cg.cg_code(' case %i: //%s' % (op_idx, dst_op.name)) + cg.cg_code(' return false;') + op_idx += 1 + cg.cg_code(' default:') + cg.cg_code(r' fatal("op idx %i out of bounds\n", opIdx);') + cg.cg_code(' return false;') + cg.cg_code('}') + cg.cg_end('isSrcOperand') + + # bool isDstOperand(int opIdx); + op_idx = 0 + cg.cg_newline() + cg.cg_method('bool', None, 'isDstOperand', ['int opIdx'], None, + 'override') + cg.cg_code('switch (opIdx) {') + for src_op in info.src: + if ((src_op.name == 'carryin' or src_op.name == 'vcc') and + 'OPF_VCCS' not in info.flags): + continue + if src_op.name == 'vgpr_d0' and 'OPF_DS1D' not in info.flags: + continue + if src_op.name == 'vgpr_d1' and 'OPF_DS2D' not in info.flags: + continue + cg.cg_code(' case %i: //%s' % (op_idx, src_op.name)) + cg.cg_code(' return false;') + op_idx += 1 + if 'OPF_RDVCC' in info.flags: + cg.cg_code(' case %i:' % op_idx) + cg.cg_code(' return false;') + op_idx += 1 + for dst_op in info.dst: + if ((dst_op.name == 'carryout' or dst_op.name == 'vcc') and + 'OPF_VCCD' not in info.flags): + continue + if dst_op.name == 'vgpr_rtn' and 'OPF_DSRTN' not in info.flags: + continue + cg.cg_code(' case %i: //%s' % (op_idx, dst_op.name)) + cg.cg_code(' return true;') + op_idx += 1 + cg.cg_code(' default:') + cg.cg_code(r' fatal("op idx %i out of bounds\n", opIdx);') + cg.cg_code(' return false;') + cg.cg_code('}') + cg.cg_end('isDstOperand') + + # void execute(GPUDynInstPtr gpuDynInst); + cg.cg_newline() + cg.cg_code('void execute(GPUDynInstPtr) override;') + if ('OPF_MEM_STORE' in info.flags or 'LOAD' in op_op or + op_op == 'DS_WRITE_B32' or op_op == 'DS_WRITE_B64' or + op_op == 'DS_READ_B32' or op_op == 'DS_READ_B64'): + cg.cg_code('void initiateAcc(GPUDynInstPtr) override;') + cg.cg_code('void completeAcc(GPUDynInstPtr) override;') + + #if op_enc == 'SMEM': + #info.decl.append('SregU64 offset(extData.OFFSET);') + #info.decl.append('SregU64 offset(665);') + + if op_inst in HandCodedDecl.keys(): + cg.cg_newline() + cg.cg_scope('private:') + cg.cg_block(HandCodedDecl[op_inst]) + elif info.decl: + cg.cg_newline() + cg.cg_scope('private:') + cg.cg_block(info.decl) + + cg.cg_end(op_inst) # cg_class + cg.cg_newline() + + for op_enc in self.inst_with_encodings: + if op_enc == 'EXP': + op_op = 'default' + else: + op_op = 'invalid' + op_inst = 'Inst_%s__%s' % (op_enc, op_op) + op_fmt = 'InFmt_%s' % op_enc + cg.cg_newline() + cg.cg_class(op_inst, ['Inst_%s' % op_enc ]) + cg.cg_scope('public:') + cg.cg_code('%s(%s*);' % (op_inst, op_fmt)) + cg.cg_code('~%s();' % op_inst) + cg.cg_newline() + + cg.cg_code('bool isValid() const override;') + cg.cg_code('int getNumOperands() override { return -1; }') + cg.cg_code('int numDstRegOperands() override { return -1; }') + cg.cg_code('int numSrcRegOperands() override { return -1; }') + cg.cg_code('void execute(GPUDynInstPtr) override;') + if ('OPF_MEM_STORE' in info.flags or 'LOAD' in op_op or + op_op == 'DS_WRITE_B32' or op_op == 'DS_WRITE_B64' or + op_op == 'DS_READ_B32' or op_op == 'DS_READ_B64'): + cg.cg_code('void initiateAcc(GPUDynInstPtr) override;') + cg.cg_end(op_inst) # cg_class + + cg.cg_newline() + op_inst = 'Inst_invalid' + op_fmt = 'InstFormat' + cg.cg_class(op_inst, ['ViGPUStaticInst']) + cg.cg_scope('public:') + cg.cg_code('%s(%s*);' % (op_inst, op_fmt)) + cg.cg_code('~%s();' % op_inst) + cg.cg_newline() + cg.cg_code('int getNumOperands() override { return -1; }') + cg.cg_code('int numDstRegOperands() override { return -1; }') + cg.cg_code('int numSrcRegOperands() override { return -1; }') + cg.cg_code('void execute(GPUDynInstPtr gpuDynInst) { }') + cg.cg_code('bool isValid() const override;') + cg.cg_code('uint32_t instSize();') + cg.cg_end(op_inst) # cg_class + + cg.cg_end('namespace ViISA') # cg_namespace + + cg.cg_code('#endif // __GPU_INTERNAL_ARCH_VI_INSTRUCTIONS_HH__') + cg.generate() + + def find_opr_info(self, oi_list, opr, idx): + assert type(oi_list) is list + assert type(opr) is str + assert type(idx) is int + for oi in oi_list: + assert type(oi) is OpInfo + if oi.opr == opr and oi.index == idx: + return oi + import pdb; pdb.set_trace() + return None + + def fmt_to_details(self, fmt): + if fmt in FmtToDetails.keys(): + return FmtToDetails[fmt] + if fmt == '': + return ('unknown', -1) + import pdb; pdb.set_trace() + return ('unknown', -1) + + def type_to_details(self, typ): + if typ in TypeToDetails.keys(): + return TypeToDetails[typ] + # caller can handle 'unknown' + return ('unknown', -1) + + def size_to_details(self, size): + if size > 0: + bits = 32 * size + return ('SregU%d' % bits, bits) + import pdb; pdb.set_trace() + return ('unknown', -1) + + def reg_info_to_field(self, reg, info, op_info): + assert type(reg) is str + assert type(op_info) is OpInfo + key = '%s:%s:%s:%s' % (info.enc, reg, op_info.iseq, op_info.name) + if key in EncRegInfoToField.keys(): + return EncRegInfoToField[key] + key = '%s:%s:%s' % (reg, op_info.iseq, op_info.name) + if key in RegInfoToField.keys(): + return RegInfoToField[key] + import pdb; pdb.set_trace() + return '' + + def typ_to_dtyp_dsiz(self, typ, op_info): + assert type(typ) is list + assert type(op_info) is OpInfo + # instruction desc info gets priority + details = self.type_to_details(typ[0]) + dtyp = details[0] + if dtyp == 'unknown': + details = self.fmt_to_details(op_info.fmt) + dtyp = details[0] + if dtyp == 'unknown': + details = self.size_to_details(op_info.size) + dtyp = details[0] + dsiz = details[1] + if dsiz < 0: + dsiz = op_info.size + return (dtyp, dsiz) + + def spec_reg_to_frag(self, reg, info, key): + frag = CodeFrag(key) + details = SpecRegToDetails[reg] + frag.setup(details) + if frag.ctx == '' and frag.fld == '': + op_info = self.find_opr_info(info.dst, 'dst', 0) + no_typ = ['unknown', -1] + (dtyp, dsiz) = self.typ_to_dtyp_dsiz(no_typ, op_info) + frag.typ = dtyp + return frag + + def reg_access_fragment(self, reg, info, op_info, vop, key): + assert type(reg) is DataRegClause + assert type(info) is RefinedOpInfo + assert type(op_info) is OpInfo + assert type(vop) is str + assert type(key) is str + (dtyp, dsiz) = self.typ_to_dtyp_dsiz(reg.typ, op_info) + field = self.reg_info_to_field(reg.reg, info, op_info) + if field in self.inst_fields[info.enc].keys(): + data = self.inst_fields[info.enc][field] + word = '%s.%s' % (data, field) + else: + word = field + if reg.reg in DataRegisters: + if reg.reg[-2:] == '_1': + word += ' + 1' + elif reg.reg[-2:] == '_2': + word += ' + 2' + if field and field[0] == '-': + vop = '' + var = op_info.name + codeFrag = CodeFrag(key) + codeFrag.ctx = op_info.iseq # context accessor + codeFrag.fld = word # field reference + codeFrag.var = var # variable name + codeFrag.typ = dtyp # variable type + codeFrag.exp = '' # general expression + codeFrag.vec = vop # vector index + return codeFrag + + def dst_fragment(self, reg, info): + assert type(reg) is DataRegClause + assert type(info) is RefinedOpInfo + vop = '' + if reg.reg == 'D': + op_info = self.find_opr_info(info.dst, 'dst', 0) + elif reg.reg in DataRegisters: + op_info = self.find_opr_info(info.dst, 'dst', 0) + elif reg.reg in SpecRegToDetails.keys(): + return self.spec_reg_to_frag(reg.reg, info, 'dst') + else: + import pdb; pdb.set_trace() + return None + assert type(op_info) is OpInfo + if 'OPF_VECTOR' in info.flags: + if op_info.iseq != 'SREG': # VOP3_VOPC uses a scalar dst + vop = '[t]' + return self.reg_access_fragment(reg, info, op_info, vop, 'dst') + + def get_addr_op_info(self, info): + assert type(info) is RefinedOpInfo + if info.enc in ['SMEM', 'MUBUF']: + if 'OPF_MEM_STORE' in info.flags: + return self.find_opr_info(info.src, 'src', 1) + return self.find_opr_info(info.src, 'src', 0) + + def get_data_op_info(self, info): + assert type(info) is RefinedOpInfo + if info.enc in ['SMEM', 'MUBUF']: + if 'OPF_MEM_STORE' in info.flags: + return self.find_opr_info(info.src, 'src', 0) + if 'OPF_MEM_ATOMIC' in info.flags: + return self.find_opr_info(info.dst, 'dst', 0) + return self.find_opr_info(info.src, 'src', 1) + + def src_fragment(self, reg, info): + assert type(reg) is DataRegClause + assert type(info) is RefinedOpInfo + vop = '' + if reg.reg == 'S0' or reg.reg == 'S': + op_info = self.find_opr_info(info.src, 'src', 0) + elif reg.reg == 'S1': + op_info = self.find_opr_info(info.src, 'src', 1) + elif reg.reg == 'S2': + op_info = self.find_opr_info(info.src, 'src', 2) + elif reg.reg == 'D': + op_info = self.find_opr_info(info.dst, 'dst', 0) + elif reg.reg == 'ADDR': + op_info = self.get_addr_op_info(info) + elif reg.reg == 'ADDR_BASE': + op_info = self.find_opr_info(info.src, 'src', 0) + elif reg.reg == 'DATA2': + op_info = self.find_opr_info(info.src, 'src', 2) + elif reg.reg in DataRegisters: + op_info = self.get_data_op_info(info) + elif reg.reg in SpecRegToDetails.keys(): + return self.spec_reg_to_frag(reg.reg, info, 'src') + else: + import pdb; pdb.set_trace() + return None + assert type(op_info) is OpInfo + if 'OPF_VECTOR' in info.flags: + if op_info.iseq != 'SREG': # VOP3_VOPC uses a scalar dst + vop = '[t]' + return self.reg_access_fragment(reg, info, op_info, vop, 'src') + + def unary_expr(self, clause, info): + assert type(clause) is UnaryClause + assert type(info) is RefinedOpInfo + (src_set, operand) = self.expression(clause.oprnd, info) + exp = '%s%s' % (clause.op, operand) + return (src_set, exp) + + def binary_expr(self, clause, info): + assert type(clause) is BinaryClause + assert type(info) is RefinedOpInfo + (src_set, lex) = self.expression(clause.left, info) + (rex_ss, rex) = self.expression(clause.right, info) + src_set.extend(rex_ss) + exp = '%s %s %s' % (lex, clause.op, rex) + return (src_set, exp) + + def function_expr(self, clause, info): + assert type(clause) is FunctionClause + assert type(info) is RefinedOpInfo + if clause.args: + (src_set, arg_exp) = self.expression(clause.args, info) + else: + src_set = [] + arg_exp = '' + exp = '%s(%s)' % (clause.func, arg_exp) + return (src_set, exp) + + def cond_expr(self, clause, info): + assert isinstance(clause, Clause) + assert type(info) is RefinedOpInfo + (src_set, cond_exp) = self.expression(clause.cond, info) + (true_ss, true_exp) = self.expression(clause.true, info) + (false_ss, false_exp) = self.expression(clause.false, info) + src_set.extend(true_ss) + src_set.extend(false_ss) + exp = '%s ? %s : %s' % (cond_exp, true_exp, false_exp) + return (src_set, exp) + + def const_expr(self, clause, info): + assert type(clause) is ConstantClause + assert type(info) is RefinedOpInfo + if type(clause.value) is float: + exp = '%f' % clause.value + elif clause.value < 1000: + exp = '%d' % clause.value + else: + exp = '0x%x' % clause.value + return ([], exp) + + def cast_expr(self, clause, info): + assert type(clause) is CastClause + assert type(info) is RefinedOpInfo + (src_set, var_exp) = self.expression(clause.var, info) + exp = '(%s)%s' % (clause.typ, var_exp) + return (src_set, exp) + + def func_expr(self, clause, info): + assert type(clause) is FunctionClause + assert type(info) is RefinedOpInfo + if clause.args == []: + src_set = [] + exp = '%s()' % clause.func + else: + (src_set, arg_exp) = self.expression(clause.args[0], info) + exp = '%s(%s' % (clause.func, arg_exp) + for a in clause.args[1:]: + (arg_ss, arg_exp) = self.expression(a, info) + exp += ', %s' % (arg_exp) + src_set.extend(arg_ss) + exp += ')' + return (src_set, exp) + + def comma_expr(self, clause, info): + assert type(clause) is CommaClause + assert type(info) is RefinedOpInfo + (src_set, lex) = self.expression(clause.left, info) + (right_ss, rex) = self.expression(clause.right, info) + src_set.extend(right_ss) + exp = '%s, %s' % (lex, rex) + return (src_set, exp) + + def src_gpr_expr(self, clause, info): + assert type(clause) is GprClause + assert type(info) is RefinedOpInfo + # use the S0 op_info as a reference + op_info = self.find_opr_info(info.src, 'src', 0) + (dtyp, dsiz) = self.typ_to_dtyp_dsiz(clause.typ, op_info) + if type(clause.idx) is BinaryClause: + (src_set, rex) = self.expression(clause.idx.right, info) + (left_ss, lex) = self.src_operand(clause.idx.left, info) + left_ss[0].fld += ' %s %s' % (clause.idx.op, rex) + src_set.extend(left_ss) + return (src_set, lex) + elif type(clause.idx) is DataRegClause: + return self.src_operand(clause.idx, info) + else: + import pdb; pdb.set_trace() + return ([], '') + + def dst_gpr_expr(self, clause, info): + assert type(clause) is GprClause + assert type(info) is RefinedOpInfo + # use the D0 op_info as a reference + op_info = self.find_opr_info(info.dst, 'dst', 0) + (dtyp, dsiz) = self.typ_to_dtyp_dsiz(clause.typ, op_info) + if type(clause.idx) is BinaryClause: + (src_set, rex) = self.expression(clause.idx.right, info) + (left_ss, lex) = self.dst_operand(clause.idx.left, info) + left_ss[0].fld += ' %s %s' % (clause.idx.op, rex) + src_set.extend(left_ss) + return (src_set, lex) + elif type(clause.idx) is DataRegClause: + return self.dst_operand(clause.idx, info) + else: + import pdb; pdb.set_trace() + return ([], '') + return (src_set, exp) + + def range_expr(self, range, info, var): + assert type(info) is RefinedOpInfo + if range: + if type(range) is list: + (src_set, lex) = self.expression(range[0], info) + (rex_ss, rex) = self.expression(range[1], info) + src_set.extend(rex_ss) + exp = '%s(%s, %s)' % (var, lex, rex) + else: + (src_set, lex) = self.expression(range, info) + exp = '%s(%s)' % (var, lex) + else: + src_set = [] + exp = var + return (src_set, exp) + + def src_mem_expr(self, clause, info): + assert type(clause) is MemClause + assert type(info) is RefinedOpInfo + if self.prefix: + dtyp = self.prefix + else: + if info.enc == 'SMEM': + if info.name[-5:] == 'DWORD': + # fixup hack - dst:0 size is wrong + op_info = OpInfo() + op_info.size = 1 + else: + # use the D0 op_info as a reference + op_info = self.find_opr_info(info.dst, 'dst', 0) + elif info.enc == 'FLAT': + if info.name[-5:] == 'DWORD': + # fixup hack - dst:0 size is wrong + op_info = OpInfo() + op_info.size = 1 + else: + # use the D0 op_info as a reference + op_info = self.find_opr_info(info.dst, 'dst', 0) + elif info.enc == 'MUBUF': + if info.name[-5:] == 'DWORD': + # fixup hack - dst:0 size is wrong + op_info = OpInfo() + op_info.size = 1 + else: + # use the D0 op_info as a reference + op_info = self.find_opr_info(info.dst, 'dst', 0) + else: + # use the S1 op_info as a reference + op_info = self.find_opr_info(info.src, 'src', 1) + no_typ = ['unknown', -1] + (dtyp, dsiz) = self.typ_to_dtyp_dsiz(no_typ, op_info) + (src_set, addr_exp) = self.expression(clause.addr, info) + for addr_frag in src_set: + if 'ADDR' in addr_frag.fld: + break + mem_frag = CodeFrag('src') + mem_frag.ctx = clause.mem + mem_frag.fld = addr_frag.var + mem_frag.var = '' + mem_frag.typ = dtyp + mem_frag.exp = addr_exp + mem_frag.vec = addr_frag.vec + mem_frag.scn = 'mem' + src_set.append(mem_frag) + exp = 'vmem_$%s$%s' % (mem_frag.exp, mem_frag.vec) + (rng_ss, exp) = self.range_expr(clause.rng, info, exp) + src_set.extend(rng_ss) + return (src_set, exp) + + def dst_mem_expr(self, clause, info): + assert type(clause) is MemClause + assert type(info) is RefinedOpInfo + if self.prefix: + dtyp = self.prefix + else: + if info.enc == 'SMEM': + if info.name[-5:] == 'DWORD': + # fixup hack - src:0 size is wrong + op_info = OpInfo() + op_info.size = 1 + else: + # use the S0 op_info as a reference + op_info = self.find_opr_info(info.src, 'src', 0) + elif info.enc == 'FLAT': + if info.name[-5:] == 'DWORD': + # fixup hack - src:0 size is wrong + op_info = OpInfo() + op_info.size = 1 + else: + # use the S1 op_info as a reference + op_info = self.find_opr_info(info.src, 'src', 1) + elif info.enc == 'MUBUF': + if info.name[-5:] == 'DWORD': + # fixup hack - src:0 size is wrong + op_info = OpInfo() + op_info.size = 1 + else: + # use the S0 op_info as a reference + op_info = self.find_opr_info(info.src, 'src', 0) + elif info.dst: + # use the D0 op_info as a reference + op_info = self.find_opr_info(info.dst, 'dst', 0) + else: + import pdb; pdb.set_trace() + # use the S0 op_info as a reference + op_info = self.find_opr_info(info.src, 'src', 0) + no_typ = ['unknown', -1] + (dtyp, dsiz) = self.typ_to_dtyp_dsiz(no_typ, op_info) + (src_set, addr_exp) = self.expression(clause.addr, info) + for addr_frag in src_set: + if 'ADDR' in addr_frag.fld: + break + mem_frag = CodeFrag('dst') + mem_frag.ctx = clause.mem + mem_frag.fld = addr_frag.var + mem_frag.var = '' + mem_frag.typ = dtyp + mem_frag.exp = addr_exp + mem_frag.vec = addr_frag.vec + mem_frag.scn = 'mem' + src_set.append(mem_frag) + exp = 'vmem_$%s$%s' % (mem_frag.exp, mem_frag.vec) + return (src_set, exp) + + def group_expr(self, clause, info): + assert type(clause) is GroupClause + assert type(info) is RefinedOpInfo + (src_set, grp_exp) = self.expression(clause.group[0], info) + exp = 'Group(' + comma = '' + src_set = [] + for grp in clause.group: + (grp_ss, grp_exp) = self.expression(grp, info) + src_set.extend(grp_ss) + exp += comma + grp_exp + comma = ', ' + exp += ')' + return (src_set, exp) + + def paren_expr(self, clause, info): + assert type(clause) is ParenClause + assert type(info) is RefinedOpInfo + (src_set, par_exp) = self.expression(clause.parexp, info) + exp = '(%s)' % par_exp + return (src_set, exp) + + def dst_operand(self, clause, info): + assert type(clause) is DataRegClause + assert type(info) is RefinedOpInfo + frag = self.dst_fragment(clause, info) + if not type(frag) is CodeFrag: + import pdb; pdb.set_trace() + var = frag.var + frag.vec + + if clause.idx: + (idx_ss, idx_exp) = self.expression(clause.idx, info) + frag.exp = idx_exp + else: + idx_ss = [] + + (rng_ss, var) = self.range_expr(clause.rng, info, var) + + # dst frag comes first + src_set = [ frag ] + src_set.extend(idx_ss) + src_set.extend(rng_ss) + return (src_set, var) + + def src_operand(self, clause, info): + assert type(clause) is DataRegClause + assert type(info) is RefinedOpInfo + frag = self.src_fragment(clause, info) + var = frag.var + frag.vec + + if clause.idx: + (idx_ss, idx_exp) = self.expression(clause.idx, info) + frag.exp = idx_exp + else: + idx_ss = [] + + (rng_ss, var) = self.range_expr(clause.rng, info, var) + + # src frag comes last + src_set = idx_ss + src_set.extend(rng_ss) + src_set.append(frag) + return (src_set, var) + + def expression(self, clause, info): + if not isinstance(clause, Clause): + import pdb; pdb.set_trace() + assert isinstance(clause, Clause) + assert type(info) is RefinedOpInfo + if type(clause) is DataRegClause: + (src_set, exp) = self.src_operand(clause, info) + elif type(clause) is BinaryClause: + (src_set, exp) = self.binary_expr(clause, info) + elif type(clause) is ConditionalClause: + (src_set, exp) = self.cond_expr(clause, info) + elif type(clause) is UnaryClause: + (src_set, exp) = self.unary_expr(clause, info) + elif type(clause) is FunctionClause: + (src_set, exp) = self.function_expr(clause, info) + elif type(clause) is ConstantClause: + (src_set, exp) = self.const_expr(clause, info) + elif type(clause) is CastClause: + (src_set, exp) = self.cast_expr(clause, info) + elif type(clause) is FunctionClause: + (src_set, exp) = self.func_expr(clause, info) + elif type(clause) is CommaClause: + (src_set, exp) = self.comma_expr(clause, info) + elif type(clause) is GprClause: + (src_set, exp) = self.src_gpr_expr(clause, info) + elif type(clause) is MemClause: + (src_set, exp) = self.src_mem_expr(clause, info) + elif type(clause) is GroupClause: + (src_set, exp) = self.group_expr(clause, info) + elif type(clause) is ParenClause: + (src_set, exp) = self.paren_expr(clause, info) + else: + exp = 'error' + import pdb; pdb.set_trace() + return (src_set, exp) + + def assignment(self, clause, info, g): + assert type(clause) is AssignmentClause + assert type(info) is RefinedOpInfo + assert type(g) is GenOne + + dst_regs = [] + # process the left hand side + if type(clause.dst) is DataRegClause: + (dst_set, dst_var) = self.dst_operand(clause.dst, info) + g.add_dst_set(dst_set) + for d in dst_set: + if d.key == 'dst' or d.key == 'mem': + dst_regs.append(d.var) + elif type(clause.dst) is GprClause: + (dst_set, dst_var) = self.dst_gpr_expr(clause.dst, info) + g.add_dst_set(dst_set) + for d in dst_set: + if d.key == 'dst' or d.key == 'mem': + dst_regs.append(d.var) + elif type(clause.dst) is MemClause: + (dst_set, dst_var) = self.dst_mem_expr(clause.dst, info) + g.add_dst_set(dst_set) + for d in dst_set: + if d.key == 'dst' or d.key == 'mem': + dst_regs.append(d.var) + elif type(clause.dst) is GroupClause: + dst_var = 'Group(' + comma = '' + for grp in clause.dst.group: + if type(grp) is DataRegClause: + (dst_set, grp_var) = self.dst_operand(grp, info) + elif type(grp) is MemClause: + (dst_set, grp_var) = self.dst_mem_expr(grp, info) + else: + import pdb; pdb.set_trace() + g.add_dst_set(dst_set) + for d in dst_set: + if d.key == 'dst' or d.key == 'mem': + dst_regs.append(d.var) + dst_var += comma + grp_var + comma = ', ' + dst_var += ')' + else: + assert False, 'unexpected clause' + + # process the right hand side + (src_set, src_exp) = self.expression(clause.src, info) + g.add_src_set(src_set) + + # main assignment string + g.add_math('%s %s %s;' % (dst_var, clause.op, src_exp)) + + # mark dst regs as modified so we don't load them later + g.modified.extend(dst_regs) + + def ifthenelse(self, clause, info, g): + assert type(clause) is IfThenElseClause + assert type(info) is RefinedOpInfo + assert type(g) is GenOne + (cond_ss, cond_exp) = self.expression(clause.cond, info) + g.add_src_set(cond_ss) + g.add_math('if (%s) {' % cond_exp) + g.inc_indent() + for stmt in clause.then_stmt: + if type(stmt) is AssignmentClause: + self.assignment(stmt, info, g) + elif type(stmt) is CommentClause: + pass + else: + (then_ss, then_exp) = self.expression(stmt, info) + g.add_src_set(then_ss) + g.add_math('%s;' % then_exp) + if clause.else_stmt: + g.dec_indent() + g.add_math('} else {') + g.inc_indent() + for stmt in clause.else_stmt: + if type(stmt) is AssignmentClause: + self.assignment(stmt, info, g) + elif type(stmt) is CommentClause: + pass + else: + (else_ss, else_exp) = self.expression(stmt, info) + g.add_src_set(else_ss) + g.add_math('%s;' % else_exp) + g.dec_indent() + g.add_math('}') + + def if_clause(self, clause, info, g): + assert type(clause) is IfClause + assert type(info) is RefinedOpInfo + assert type(g) is GenOne + + def tab_clause(self, clause, info, g): + assert type(clause) is TabClause + assert type(info) is RefinedOpInfo + assert type(g) is GenOne + + def else_clause(self, clause, info, g): + assert type(clause) is ElseClause + assert type(info) is RefinedOpInfo + assert type(g) is GenOne + + def chain_clause(self, clause, info, g): + assert type(clause) is ChainClause + assert type(info) is RefinedOpInfo + assert type(g) is GenOne + if type(clause.right) is AssignmentClause: + self.assignment(clause.right, info, g) + right_dst = copy.deepcopy(clause.right.dst) + right_dst.rng = None + assign = AssignmentClause() + assign.dst = clause.left + assign.op = '=' + assign.src = right_dst + g.set_vector(False) + self.assignment(assign, info, g) + g.set_vector('OPF_VECTOR' in info.flags) + + def generate_execute_code(self, op_inst, info, ast, cg): + assert type(info) is RefinedOpInfo + assert type(ast) is list + assert type(cg) is CodeGen + # fix a bug in the sq_uc.arch source + if 'OPF_MOVRELS' in info.flags: + for si in info.src: + if si.iseq == 'SREG' and si.name == 'sdst': + si.name = 'ssrc' + is_vec = 'OPF_VECTOR' in info.flags + gen_one = GenOne(op_inst, cg, is_vec, self.methods, info) + self.prefix = '' + for clause in ast: + if type(clause) is AssignmentClause: + self.assignment(clause, info, gen_one) + elif type(clause) is IfThenElseClause: + self.ifthenelse(clause, info, gen_one) + elif type(clause) is IfClause: + self.if_clause(clause, info, gen_one) + elif type(clause) is TabClause: + self.tab_clause(clause, info, gen_one) + elif type(clause) is ElseClause: + self.else_clause(clause, info, gen_one) + elif type(clause) is ChainClause: + self.chain_clause(clause, info, gen_one) + elif type(clause) is SizeClause: + if 'OPF_MEM_ATOMIC' in info.flags: + #import pdb; pdb.set_trace() + self.prefix = 'SregU%d' % clause.size + info_copy = copy.deepcopy(info) + info_copy.dst[0].size = clause.size / 32 + info = info_copy + elif type(clause) is CommentClause: + pass + else: + import pdb; pdb.set_trace() + assert False, 'should not get here' + return gen_one.finish() + + def parse_and_generate(self, op_inst, info, cg): + assert type(op_inst) is str + assert type(info) is RefinedOpInfo + assert type(cg) is CodeGen + try: + ast = self.desc_parser.parse_description(info.desc) + # print '--------------------------\n%s' % (op_inst) + # pprint(info.desc) + # pprint(ast) + if not self.generate_execute_code(op_inst, info, ast, cg): + cg.cg_comment('Could not parse sq_uc.arch desc field') + cg.cg_code('//gpuDynInst->warnUnimplemented("TBD: %s");' + % op_inst) + return 'empty' + except ParseError: + # print '--------------------------\n%s' % (op_inst) + # pprint(info.desc) + cg.cg_comment('Could not parse sq_uc.arch desc field') + cg.cg_code('//gpuDynInst->warnUnimplemented("TBD: %s");' + % op_inst) + return 'except' + except: + raise + return 'success' + + def setOpTypeFlags(self, info, op_enc, op_op, cg): + # Op type flags: + # Nop - op_op contains NOP + # ALU + # Branch - op_op contains BRANCH or CBRANCH + # Conditional Branch - op_op contains CBRANCH + # Return - not implemented + # MemFence - no matching VI ISA operations + # MemBarrier - S_BARRIER + # Flat - op_enc = FLAT + # SpecialOp - not implemented + if 'CBRANCH' in op_op: + cg.cg_code('setFlag(CondBranch);') + if op_op == 'S_BARRIER': + cg.cg_code('setFlag(MemBarrier);') + elif op_op == 'S_WAITCNT': + cg.cg_code('setFlag(ALU);') + cg.cg_code('setFlag(Waitcnt);') + elif op_op == 'S_ENDPGM': + cg.cg_code('setFlag(ALU);') + elif 'NOP' in op_op: + cg.cg_code('setFlag(Nop);') + if op_op == 'V_NOP': + cg.cg_code('setFlag(ALU);') + elif 'BRANCH' in op_op: + cg.cg_code('setFlag(Branch);') + elif op_op in ['S_SETPC', 'S_SWAPPC', 'S_SETVSKIP']: + cg.cg_code('setFlag(UnconditionalJump);') + # set ALU flag for each encoding + elif (op_enc == 'SOP2' and not op_op in + ['S_CBRANCH_G_FORK', 'S_RFE_RESTORE_B64']): + cg.cg_code('setFlag(ALU);') + elif (op_enc == 'SOPK' and not op_op in + ['S_GETREG_B32', 'S_SETREG_B32', 'S_SETREG_IMM32_B32']): + cg.cg_code('setFlag(ALU);') + elif (op_enc == 'SOP1' and not op_op in + ['S_GETPC_B64', 'S_SETPC_B64', 'S_SWAPPC_B64', 'S_RFE_B64', + 'S_SET_GPR_IDX_IDX']): + cg.cg_code('setFlag(ALU);') + elif (op_enc == 'SOPC' and not op_op in + ['S_SETVSKIP', 'S_SET_GPR_IDX_ON']): + cg.cg_code('setFlag(ALU);') + elif op_enc == 'VOP2': + cg.cg_code('setFlag(ALU);') + elif op_enc == 'VOP1' and not op_op in ['V_READFIRSTLANE_B32']: + cg.cg_code('setFlag(ALU);') + elif op_enc == 'VOPC': + cg.cg_code('setFlag(ALU);') + elif op_enc == 'VINTRP': + cg.cg_code('setFlag(ALU);') + elif (op_enc == 'VOP3' and not op_op in + ['V_CLREXCP', 'V_READLANE_B32', 'V_WRITELANE_B32']): + cg.cg_code('setFlag(ALU);') + elif (op_enc in ['SOPP', 'SMEM', 'DS', 'MUBUF', 'MTBUF', 'MIMG', 'EXP', + 'FLAT']): + # remaining opcodes in these encodings are + # considered ALU operations + pass + + def setMemoryAccessFlags(self, info, op_enc, op_op, cg): + # set memory access flags for non-atomic operations + if 'OPF_MEM_STORE' in info.flags or 'DS_WRITE' in op_op: + cg.cg_code('setFlag(MemoryRef);') + cg.cg_code('setFlag(Store);') + elif 'LOAD' in op_op or 'DS_READ' in op_op: + cg.cg_code('setFlag(MemoryRef);') + cg.cg_code('setFlag(Load);') + + def setSegmentAccessFlags(self, info, op_enc, op_op, cg): + if op_enc in ['MUBUF', 'MTBUF', 'MIMG']: + cg.cg_code('setFlag(GlobalSegment);') + + def setAtomicFlags(self, info, op_enc, op_op, cg): + ops = {'AND' : 'AtomicAnd', + 'OR' : 'AtomicOr', + 'XOR' : 'AtomicXor', + 'CMPSWAP' : 'AtomicCAS', + 'ADD' : 'AtomicAdd', + 'SUB' : 'AtomicSub', + 'INC' : 'AtomicInc', + 'DEC' : 'AtomicDec', + 'MAX' : 'AtomicMax', + 'SMAX' : 'AtomicMax', + 'UMAX' : 'AtomicMax', + 'MIN' : 'AtomicMin', + 'SMIN' : 'AtomicMin', + 'UMIN' : 'AtomicMin', + 'SWAP' : 'AtomicExch' + } + atomic_op = re.sub('.*ATOMIC_', '', op_op) + atomic_op = re.sub('_.*', '', atomic_op) + + if atomic_op in ops.keys(): + cg.cg_code('setFlag(%s);' % ops[atomic_op]) + # for atomics, the GLC bit determines if + # an atomic returns the pre-op value + cg.cg_if('instData.GLC') + cg.cg_code('setFlag(AtomicReturn);') + cg.cg_else() + cg.cg_code('setFlag(AtomicNoReturn);') + cg.cg_end('if') + cg.cg_code('setFlag(MemoryRef);') + + + def setModeFlags(self, info, op_op, cg): + if ('S_SETVSKIP' in op_op or 'S_SETREG' in op_op + or 'S_SET_GPR' in op_op): + cg.cg_code('setFlag(WritesMode);') + if 'S_GETREG' in op_op: + cg.cg_code('setFlag(ReadsMode);') + + def setEXECFlags(self, info, cg): + if 'OPF_RDEX' in info.flags: + cg.cg_code('setFlag(ReadsEXEC);') + if 'OPF_WREX' in info.flags: + cg.cg_code('setFlag(WritesEXEC);') + + def setVCCFlags(self, info, cg): + if 'OPF_VCCD' in info.flags: + cg.cg_code('setFlag(WritesVCC);') + if 'OPF_VCCS' in info.flags or 'OPF_RDVCC' in info.flags: + cg.cg_code('setFlag(ReadsVCC);') + + def setFlags(self, info, op_inst, op_enc, op_op, cg): + self.setOpTypeFlags(info, op_enc, op_op, cg) + self.setVCCFlags(info, cg) + self.setEXECFlags(info, cg) + self.setModeFlags(info, op_op, cg) + if 'OPF_MEM_ATOMIC' in info.flags: + self.setAtomicFlags(info, op_enc, op_op, cg) + else: + self.setMemoryAccessFlags(info, op_enc, op_op, cg) + self.setSegmentAccessFlags(info, op_enc, op_op, cg) + + # instructions.cc + def generate_instructions_cc(self, output_dir): + file = os.path.join(output_dir, 'instructions.cc') + cg = CodeGen(file) + + cg.cg_include('') + cg.cg_newline() + + cg.cg_include('gpu-compute/shader.hh') + cg.cg_include('gpu-internal/arch/vi/instructions.hh') + cg.cg_include('gpu-internal/arch/vi/inst_util.hh') + cg.cg_newline() + + cg.cg_namespace('ViISA') + + instruction_index = 0 + empty_count = 0 + exception_count = 0 + found_exceptions = [] + known_except = copy.deepcopy(KnownExceptions) + found_empty = [] + known_empty = copy.deepcopy(KnownEmpty) + + for info in self.refined_op_info: + op_op = re.sub('.*__', '', info.name) + op_enc = re.sub('__.*', '', info.name) + op_inst = 'Inst_%s__%s' % (op_enc, op_op) + op_base = 'Inst_%s' % op_enc + op_fmt = 'InFmt_%s' % op_enc + if op_fmt == 'InFmt_VOP3': + vccd = 'OPF_VCCD' in info.flags + vopc = 'OPF_PEN_VOPC' in info.flags + if vccd or vopc: + op_base = 'Inst_VOP3_SDST_ENC' + op_fmt = 'InFmt_VOP3_SDST_ENC' + arg = '%s *iFmt' % op_fmt + ini = '%s(iFmt, "%s")' % (op_base, op_op.lower()) + + cg.cg_comment('--- %s class methods ---' % op_inst) + cg.cg_newline() + cg.cg_method(None, op_inst, op_inst, [arg], [ini]) + self.setFlags(info, op_inst, op_enc, op_op, cg) # set Flags + cg.cg_end(op_inst) # cg_method + cg.cg_newline() + cg.cg_method(None, op_inst, '~%s' % op_inst, [], []) + cg.cg_end('~%s' % op_inst) # cg_method + cg.cg_newline() + + cg.cg_comment('--- description from .arch file ---') + edited = [] + for d in info.desc: + edited.extend(d.split('\\n')) + for e in edited: + if e: + line = e.replace('\\t', ' ') + lead = '' + abs_max = 80 - 3 # cg_comment prepends '// ' + len_max = abs_max - len(lead) + while len(line) > len_max: + brk = line.rfind(' ', 0, len_max) + cg.cg_comment(lead + line[0:brk]) + line = line[brk:] + lead = '--- ' + len_max = abs_max - len(lead) + cg.cg_comment(lead + line) + cg.cg_method('void', op_inst, 'execute', + ['GPUDynInstPtr gpuDynInst'], []) + # breakpoint + #if op_inst == 'Inst_MUBUF__BUFFER_ATOMIC_SWAP': + # import pdb; pdb.set_trace() + if op_inst in HandCodedExecMethods.keys(): + cg.cg_block(HandCodedExecMethods[op_inst]) + cg.cg_end('execute') # cg_method + else: + result = self.parse_and_generate(op_inst, info, cg) + if result == 'success': + pass + elif result == 'empty': + empty_count += 1 + if op_inst not in known_empty: + found_empty.append(op_inst) + print 'Parse Empty' + pprint(found_empty) + import pdb; pdb.set_trace() + else: + known_empty.remove(op_inst) + cg.cg_end('execute') # cg_method + elif result == 'except': + exception_count += 1 + if op_inst not in known_except: + found_exceptions.append(op_inst) + print 'Parse Error' + pprint(found_exceptions) + import pdb; pdb.set_trace() + else: + known_except.remove(op_inst) + cg.cg_end('execute') # cg_method + instruction_index += 1 + if known_empty: + print 'Problems in instructions %s are no longer seen.' % ( + repr(known_empty)) + if known_except: + print 'Exceptions in instructions %s are no longer seen.' % ( + repr(known_except)) + i = instruction_index + e = empty_count + print 'Found no pseudo code in %d of %d instructions' % (e, i) + e = exception_count + print 'Found exceptions in %d of %d instructions' % (e, i) + + for op_enc in self.inst_with_encodings: + if op_enc == 'EXP': + op_op = 'default' + else: + op_op = 'invalid' + op_inst = 'Inst_%s__%s' % (op_enc, op_op) + cg.cg_newline() + cg.cg_comment('--- %s class methods ---' % op_inst) + arg = 'InFmt_%s *iFmt' % op_enc + ini = 'Inst_%s(iFmt, "%s_%s")' % (op_enc, op_enc, op_op) + cg.cg_method(None, op_inst, op_inst, [arg], [ini]) + cg.cg_end(op_inst) # cg_method + cg.cg_method(None, op_inst, '~%s' % op_inst, [], []) + cg.cg_end('~%s' % op_inst) # cg_method + cg.cg_newline() + cg.cg_method('bool', op_inst, 'isValid', [], [], 'const') + cg.cg_code('return false;') + cg.cg_end('isValid') # cg_method + cg.cg_method('void', op_inst, 'execute', + ['GPUDynInstPtr gpuDynInst'], []) + cg.cg_code('//gpuDynInst->warnUnimplemented("%s");' % op_inst) + cg.cg_end('execute') # cg_method + + op_inst = 'Inst_invalid' + cg.cg_newline() + cg.cg_comment('--- %s class methods ---' % op_inst) + cg.cg_method(None, op_inst, op_inst, + ['MachInst'], + ['ViGPUStaticInst("Inst_invalid")']) + cg.cg_end(op_inst) # cg_method + cg.cg_method(None, op_inst, '~%s' % op_inst, [], []) + cg.cg_end('~%s' % op_inst) # cg_method + cg.cg_newline() + cg.cg_method('uint32_t', op_inst, 'instSize', [], []) + cg.cg_code('return 4;') + cg.cg_end('instSize') # cg_method + cg.cg_method('bool', op_inst, 'isValid', [], [], 'const') + cg.cg_code('return false;') + cg.cg_end('isValid') # cg_method + + cg.cg_end('namespace ViISA') # cg_namespace + + cg.generate() + + def generate_code(self, output_dir): + # pprint(self.refined_op_info) + self.generate_decoder_hh(output_dir) + self.generate_decoder_cc(output_dir) + self.generate_instructions_cc(output_dir) + self.generate_instructions_hh(output_dir) diff --git a/src/arch/amdgpu/gcn3/ast_objects.py b/src/arch/amdgpu/gcn3/ast_objects.py new file mode 100644 index 0000000000..c658f31ff5 --- /dev/null +++ b/src/arch/amdgpu/gcn3/ast_objects.py @@ -0,0 +1,865 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +class Statement(object): + def __init__(self): + self.keyword = '' + +class ImportStatement(Statement): + def __init__(self): + self.keyword = 'import' + self.what = '' + self.name = '' + +class FlagBlock(Statement): + def __init__(self): + self.keyword = 'flag' + self.name = '' + self.desc = [] + + def update(self, ref): + if ref.tag == 'name': + self.name = ref.name + elif ref.tag == 'desc': + self.desc = ref.desc + elif ref.tag == 'desc+': + self.desc = self.desc + ref.desc + else: + assert (False), 'error: unexpected FlagBlock.tag' + ref.tag + self.tag = 'updated' + +class FlagsField(object): + def __init__(self): + self.tag = '' + self.name = '' + self.private = 0 + self.desc = [] + self.group = '' + + def update(self, ref): + if ref.tag == 'name': + self.name = ref.name + elif ref.tag == 'private': + self.private = ref.private + elif ref.tag == 'desc': + self.desc = ref.desc + elif ref.tag == 'desc+': + self.desc = self.desc + ref.desc + elif ref.tag == 'group': + self.group = ref.group + else: + assert (False), 'error: unexpected FlagsField.tag ' + ref.tag + self.tag = 'updated' + +class FlagsBlock(Statement): + def __init__(self): + self.keyword = 'flags' + self.clauses = [] + +class OpInfo(object): + def __init__(self): + self.tag = '' + self.opr = '' + self.index = -1 + self.iseq = '' + self.name = '' + self.fmt = '' + self.size = -1 + self.inout = -1 + + def __repr__(self): + text = '\n [\n' + text += '\topr: ' + repr(self.opr) + ',\n' + text += '\tindex: ' + repr(self.index) + ',\n' + text += '\tiseq: ' + repr(self.iseq) + ',\n' + text += '\tname: ' + repr(self.name) + ',\n' + text += '\tfmt: ' + repr(self.fmt) + ',\n' + text += '\tsize: ' + repr(self.size) + ',\n' + text += '\tinout: ' + repr(self.inout) + '\n' + text += '\n ]\n' + return text + + def update(self, ref): + if ref.tag == 'dst': + self.opr = ref.opr + self.index = ref.index + self.iseq = ref.iseq + elif ref.tag == 'src': + self.opr = ref.opr + self.index = ref.index + self.iseq = ref.iseq + elif ref.tag == 'name': + self.name = ref.name + elif ref.tag == 'fmt': + self.fmt = ref.fmt + elif ref.tag == 'size': + self.size = ref.size + elif ref.tag == 'inout': + self.inout = ref.inout + else: + assert (False), 'error: unexpected OpInfo.tag ' + ref.tag + self.tag = 'updated' + + def match(self, ref): + if self.opr != ref.opr: + return False + if self.index != ref.index: + return False + return True + + def override(self, ref): + if ref.iseq: + self.iseq = ref.iseq + if ref.name: + self.name = ref.name + if ref.fmt: + self.fmt = ref.fmt + if ref.size >= 0: + self.size = ref.size + if ref.inout >= 0: + self.inout = ref.inout + +class Operand(object): + def __init__(self): + self.tag = '' + self.num_dst = -1 + self.num_src = -1 + self.parent_enc = '' + self.sub_enc = '' + self.flags = [] + self.dst = [] + self.src = [] + self.when = [] + self.operands = [] + + def __repr__(self): + text = '\tnum_dst: ' + repr(self.num_dst) + ',\n' + text += '\tnum_src: ' + repr(self.num_src) + ',\n' + text += '\tparent_enc: ' + repr(self.parent_enc) + ',\n' + text += '\tsub_enc: ' + repr(self.sub_enc) + ',\n' + text += '\tflags: ' + repr(self.flags) + ',\n' + text += '\tdst: ' + repr(self.dst) + ',\n' + text += '\tsrc: ' + repr(self.src) + ',\n' + text += '\twhen: ' + repr(self.when) + ',\n' + text += '\toperands: ' + repr(self.operands) + '\n' + return text + + def update(self, ref): + if ref.tag == 'num_dst': + self.num_dst = ref.num_dst + elif ref.tag == 'num_src': + self.num_src = ref.num_src + elif ref.tag == 'parent_enc': + self.parent_enc = ref.parent_enc + elif ref.tag == 'sub_enc': + self.sub_enc = ref.sub_enc + elif ref.tag == 'flags': + self.flags = self.flags + ref.flags + elif ref.tag == 'dst': + self.dst = self.dst + ref.dst + elif ref.tag == 'src': + self.src = self.src + ref.src + elif ref.tag == 'when': + self.when = self.when + ref.when + elif ref.tag == 'operands': + self.operands = self.operands + ref.operands + else: + assert (False), 'error: unexpected Operand.tag ' + ref.tag + self.tag = 'updated' + + def override(self, ref): + if ref.num_dst > 0: + self.num_dst = ref.num_dst + if ref.num_src > 0: + self.num_src = ref.num_src + for r in ref.dst: + for s in self.dst: + if s.match(r): + s.override(r) + for r in ref.src: + for s in self.src: + if s.match(r): + s.override(r) + +class WhenBlock(object): + def __init__(self): + self.left = '' + self.right = [] + self.operand = None + +class EncodingBlock(Statement): + def __init__(self): + self.keyword = 'encoding' + self.tag = '' + self.name = '' + self.bits = '' + self.size = -1 + self.desc = [] + self.operands = [] + + def update(self, ref): + if ref.tag == 'name': + self.name = ref.name + elif ref.tag == 'bits': + self.bits = ref.bits + elif ref.tag == 'size': + self.size = ref.size + elif ref.tag == 'desc': + self.desc = self.desc + ref.desc + elif ref.tag == 'operands': + self.operands = self.operands + ref.operands + else: + assert (False), 'error: unexpected EncodingBlock.tag ' + ref.tag + self.tag = 'updated' + +class ConstClause(object): + def __init__(self): + self.name = '' + self.value = 0 + +class ConstBlock(Statement): + def __init__(self): + self.keyword = 'const' + self.clauses = [] + +class TypeClause(object): + def __init__(self): + self.tag = '' + self.name = '' + self.v_max = 0 + self.value = 0 + self.dp_only = 0 + self.size = -1 + self.var = False + self.desc = [] + self.flags = [] + self.src_flags = '' + self.sp3_desc = [] + self.sp3_name = '' + self.sp3_ncomp = 0 + self.sp3_num = '' + self.parent_enc = '' + self.sub_enc = '' + self.op_type = '' + self.fmt = '' + self.type = '' + self.range = [] + self.size_bits = -1 + + def update(self, ref): + if ref.tag == 'id_range': + self.name = ref.name + self.v_max = ref.v_max + self.value = ref.value + elif ref.tag == 'id_number': + self.name = ref.name + self.value = ref.value + elif ref.tag == 'id_var_number': + self.name = ref.name + self.value = ref.value + self.var = ref.var + elif ref.tag == 'desc': + self.desc = ref.desc + self.desc + elif ref.tag == 'desc+': + self.desc = self.desc + ref.desc + elif ref.tag == 'flags': + self.flags = ref.flags + self.flags + elif ref.tag == 'flags+': + self.flags = self.flags + ref.flags + elif ref.tag == 'src_flags': + self.src_flags = ref.src_flags + elif ref.tag == 'sp3_desc': + self.sp3_desc = ref.sp3_desc + self.sp3_desc + elif ref.tag == 'sp3_desc+': + self.sp3_desc = self.sp3_desc + ref.sp3_desc + elif ref.tag == 'sp3_name': + self.sp3_name = ref.sp3_name + elif ref.tag == 'sp3_ncomp': + self.sp3_ncomp = ref.sp3_ncomp + elif ref.tag == 'sp3_num': + self.sp3_num = ref.sp3_num + elif ref.tag == 'parent_enc': + self.parent_enc = ref.parent_enc + elif ref.tag == 'sub_enc': + self.sub_enc = ref.sub_enc + elif ref.tag == 'op_type': + self.op_type = ref.op_type + elif ref.tag == 'dp_only': + self.dp_only = ref.dp_only + elif ref.tag == 'size': + self.size = ref.size + elif ref.tag == 'fmt': + self.fmt = ref.fmt + elif ref.tag == 'type': + self.type = ref.type + elif ref.tag == 'range': + self.range = ref.range + else: + assert (False), 'error: unexpected TypeClause.tag ' + ref.tag + self.tag = 'updated' + +class TypeBlock(Statement): + def __init__(self): + self.keyword = 'type' + self.name = '' + self.clauses = [] + +class InstField(object): + def __init__(self): + self.tag = '' + self.name = '' + self.v_max = 0 + self.value = 0 + self.desc = '' + self.type = '' + self.enc = '' + + def update(self, ref): + if ref.tag == 'id_range': + self.name = ref.name + self.v_max = ref.v_max + self.value = ref.value + elif ref.tag == 'id_number': + self.name = ref.name + self.value = ref.value + elif ref.tag == 'desc': + self.desc = ref.desc + elif ref.tag == 'type': + self.type = ref.type + elif ref.tag == 'enc': + self.enc = ref.enc + else: + assert (False), 'error: unexpected InstField.tag ' + ref.tag + self.tag = 'updated' + +class InstBlock(Statement): + def __init__(self): + self.keyword = 'inst' + self.tag = '' + self.name = '' + self.desc = '' + self.fields = [] + + def update(self, ref): + if ref.tag == 'name': + self.name = ref.name + elif ref.tag == 'desc': + self.desc = ref.desc + elif ref.tag == 'fields': + self.fields = ref.fields + else: + assert (False), 'error: unexpected InstBlock.tag ' + ref.tag + self.tag = 'updated' + +FmtToDetails = { + 'NUM_B8' : ( 'SregU8', 8 ), + 'NUM_I8' : ( 'SregI8', 8 ), + 'NUM_B16' : ( 'SregU16', 16 ), + 'NUM_F16' : ( 'SregF16', 16 ), + 'NUM_B32' : ( 'SregU32', 32 ), + 'NUM_F32' : ( 'SregF32', 32 ), + 'NUM_I32' : ( 'SregI32', 32 ), + 'NUM_U32' : ( 'SregU32', 32 ), + 'NUM_B64' : ( 'SregU64', 64 ), + 'NUM_I64' : ( 'SregI64', 64 ), + 'NUM_F64' : ( 'SregF64', 64 ), + 'NUM_U64' : ( 'SregU64', 64 ), + 'BUF' : ( 'SregU64', 64 ), + 'NUM_B96' : ( 'SregU96', 96 ), + 'NUM_B128' : ( 'SregU128', 128 ), + 'RSRC_SCRATCH' : ( 'SregU128', 128 ), + 'RSRC_SCALAR' : ( 'SregU128', 128 ), + 'IMG' : ( 'SregU256', 256 ), +} + +# which get method is used to retrieve +# or set data +TypeToAccessMethod = { + # scalar reg file get + 'SregU8' : 'ScalarReg', + 'SregI8' : 'ScalarReg', + 'SregU16' : 'ScalarReg', + 'SregI16' : 'ScalarReg', + 'SregU32' : 'ScalarReg', + 'SregI32' : 'ScalarReg', + 'SregU64' : 'ScalarReg', + 'SregI64' : 'ScalarReg', + 'SregU96' : 'ScalarReg', + 'SregU128' : 'ScalarReg', + 'SregU256' : 'ScalarReg', + 'SregU512' : 'ScalarReg', + 'SregF16' : 'ScalarReg', + 'SregF32' : 'ScalarReg', + 'SregF64' : 'ScalarReg', + + # vector reg file get + 'VregU8' : 'VectorReg', + 'VregI8' : 'VectorReg', + 'VregU16' : 'VectorReg', + 'VregI16' : 'VectorReg', + 'VregU32' : 'VectorReg', + 'VregI32' : 'VectorReg', + 'VregU64' : 'VectorReg', + 'VregI64' : 'VectorReg', + 'VregU96' : 'VectorReg', + 'VregU128' : 'VectorReg', + 'VregU256' : 'VectorReg', + 'VregU512' : 'VectorReg', + 'VregF16' : 'VectorReg', + 'VregF32' : 'VectorReg', + 'VregF64' : 'VectorReg', +} + +SpecialCtx = { + 'SRC_NOLIT' : 'SrcReg', + 'SRC' : 'SrcReg', + 'SRC_NOLDS' : 'SrcReg', + 'SRC_SIMPLE' : 'SrcReg', + 'EXEC' : 'SpecialReg', + 'VCC' : 'SpecialReg', + 'SCC' : 'SpecialReg', + 'PC' : 'SpecialReg', + 'PRIV' : 'SpecialReg', + 'INST_ATC' : 'SpecialReg', + 'M0' : 'SpecialReg', + 'VSKIP' : 'SpecialReg', + 'PI' : 'SpecialReg', + 'NAN' : 'SpecialReg', + 'INF' : 'SpecialReg', + 'P0' : 'SpecialReg', + 'P10' : 'SpecialReg', + 'P20' : 'SpecialReg', + 'TBA' : 'SpecialReg', +} + +# for DS fixup +SuffixToFmt = { + '_I8' : 'NUM_I8', + 'I32' : 'NUM_I32', + 'I64' : 'NUM_I64' +} + +EncRegInfoToField = { + 'DS:ADDR:VGPR:vgpr_a' : 'ADDR', + 'MUBUF:ADDR:VGPR:vgpr_a' : 'VADDR', + 'MIMG:ADDR:VGPR:vgpr_a' : 'VADDR', +} + +RegInfoToField = { + 'D:SDST:sdst' : 'SDST', + 'D:SREG:sdst' : 'SDST', + 'S0:SDST:ssrc' : 'SDST', + 'S:SSRC:ssrc' : 'SSRC0', + 'S0:SREG:ssrc' : 'SSRC0', + 'S0:SSRC:ssrc' : 'SSRC0', + 'S0:SSRC:ssrc_0' : 'SSRC0', + 'S1:SSRC:ssrc_1' : 'SSRC1', + 'SGPR:SREG:sgpr' : '', + 'D:VGPR:vdst' : 'VDST', + 'D:VGPR:vgpr_dst' : 'VDST', + 'S0:SRC:src' : 'SRC0', + 'S0:SRC:src_0' : 'SRC0', + 'S0:SRC_NOLDS:src_0' : 'SRC0', + 'S0:SRC_NOLIT:src_0' : 'SRC0', + 'S0:SRC_NOLIT:src' : 'SRC0', + 'S0:SRC_VGPR:src' : 'SRC0', + 'S:SRC:src' : 'SRC0', + 'S:SRC_NOLIT:src' : 'SRC0', + 'S0:SRC_SIMPLE:src_0' : 'SRC0', + 'S1:SRC_SIMPLE:src_1' : 'SRC1', + 'S2:SRC_SIMPLE:src_2' : 'SRC2', + 'S1:VGPR:src_1' : 'VSRC1', + 'S:VGPR:vgpr_ij' : 'VSRC', + 'S:SRC_VGPR:vgpr_ij' : 'SRC0', + 'S0:SRC_VGPR:vgpr_ij' : 'SRC0', + 'S2:SRC_VGPR:vgpr_add' : 'SRC2', + 'S2:VGPR:src_2' : 'VSRC1', + 'VGPR:VGPR:vgpr' : '', + 'VGPR:SRC:vgpr' : '', + 'VGPR:SRC_VGPR:vgpr' : '', + 'VGPR:SRC_NOLIT:vgpr' : '', + 'MEM:MEM:Mem()' : '', + 'A:VGPR:vgpr_a' : '', + 'B:VGPR:vgpr_a' : '', + 'ADDR:SREG:sgpr_base' : 'SBASE', + 'DATA:SMWR_OFFSET:offset' : 'OFFSET', + 'DATA:SREG:sgpr_data' : 'SDATA', + 'RETURN_DATA:SREG:sgpr_data' : 'SDATA', + 'ADDR_BASE:VGPR:vgpr_a' : 'ADDR', + 'DATA:VGPR:vgpr_d0' : 'DATA0', + 'DATA_0:VGPR:vgpr_d0' : 'DATA0', + 'DATA_1:VGPR:vgpr_d0' : 'DATA0', + 'DATA_2:VGPR:vgpr_d0' : 'DATA0', + 'RETURN_DATA:VGPR:vgpr_rtn' : 'VDST', + 'RETURN_DATA_0:VGPR:vgpr_rtn' : 'VDST', + 'RETURN_DATA_1:VGPR:vgpr_rtn' : 'VDST', + 'DATA2:VGPR:vgpr_d1' : 'DATA1', + 'DATA:SREG:sgpr_r' : 'SRSRC', + 'DATA_0:SREG:sgpr_r' : 'SRSRC', + 'DATA_1:SREG:sgpr_r' : 'SRSRC', + 'DATA_2:SREG:sgpr_r' : 'SRSRC', + 'DATA:SREG:sgpr_dst' : 'SDATA', + 'DATA:VGPR:vgpr_d' : 'VDATA', + 'DATA_0:VGPR:vgpr_d' : 'VDATA', + 'DATA_1:VGPR:vgpr_d' : 'VDATA', + 'DATA_2:VGPR:vgpr_d' : 'VDATA', + 'RETURN_DATA:VGPR:vgpr_d' : 'VDATA', + 'RETURN_DATA_0:VGPR:vgpr_d' : 'VDATA', + 'RETURN_DATA_1:VGPR:vgpr_d' : 'VDATA', + 'ADDR:VGPR:vgpr_addr' : 'ADDR', + 'ADDR:VGPR:vgpr_src' : 'ADDR', + 'DATA:VGPR:vgpr_src' : 'DATA', + 'DATA_0:VGPR:vgpr_src' : 'DATA', + 'DATA_1:VGPR:vgpr_src' : 'DATA', + 'DATA_2:VGPR:vgpr_src' : 'DATA', + 'DATA:VGPR:vgpr_dst' : 'VDST', + 'DATA_0:VGPR:vgpr_dst' : 'VDST', + 'DATA_1:VGPR:vgpr_dst' : 'VDST', + 'DATA_2:VGPR:vgpr_dst' : 'VDST', + 'RETURN_DATA:VGPR:vgpr_dst' : 'VDST', + 'RETURN_DATA_0:VGPR:vgpr_dst' : 'VDST', + 'RETURN_DATA_1:VGPR:vgpr_dst' : 'VDST', + 'D:VCC:vcc' : '-VCC', +} + +DataRegisters = ['DATA', 'DATA_0', 'DATA_1', 'DATA_2', 'RETURN_DATA', + 'RETURN_DATA_0', 'RETURN_DATA_1'] + +SpecRegToDetails = { + 'SCC' : ['SCC', '', 'scc', 'SregU32', '', ''], + 'M0' : ['M0', '', 'm0', 'SregU32', '', ''], + 'PRIV' : ['PRIV', '', 'priv', 'SregU32', '', ''], + 'VSKIP' : ['VSKIP', '', 'vskip', 'SregU32', '', ''], + 'INST_ATC' : ['INST_ATC', '', 'atc', 'SregU32', '', ''], + 'VCC' : ['VCC', '', 'vcc', 'SregU64', '', ''], + 'PC' : ['PC', '', 'pc', 'SregU64', '', ''], + 'TBA' : ['TBA', '', 'tba', 'SregU64', '', ''], + 'EXEC' : ['EXEC', '', 'exec', 'SregU64', '', ''], + 'PI' : ['PI', '', 'pi', 'SregF32', '', ''], + 'INF' : ['INF', '', 'inf', 'SregF32', '', ''], + 'NAN' : ['NAN', '', 'nan', 'SregF32', '', ''], + 'P0' : ['P0', '', 'p0', 'SregF32', '', ''], + 'P10' : ['P10', '', 'p10', 'SregF32', '', ''], + 'P20' : ['P20', '', 'p20', 'SregF32', '', ''], + 'offset0' : ['', 'instData.OFFSET0', 'offset0', 'SregU16', '', ''], + 'OFFSET0' : ['', 'instData.OFFSET0', 'offset0', 'SregU16', '', ''], + 'offset1' : ['', 'instData.OFFSET1', 'offset1', 'SregU16', '', ''], + 'OFFSET1' : ['', 'instData.OFFSET1', 'offset1', 'SregU16', '', ''], + 'SIMM16' : ['', 'instData.SIMM16', 'simm16', 'SregI16', '', ''], + 'SIMM4' : ['', 'instData.SSRC1', 'simm4', 'SregU16', '', ''], + 'threadID' : ['', '', 't', 'SregU32', '', ''], + 'cmp' : ['', '', 'cmp', 'SregU64', '', ''], + 'src' : ['', '', 'src', 'SregU64', '', ''], + 'tmp' : ['', '', 'tmp', 'SregU64', '', ''], + 'attr_word' : ['ATTR', '', 'attr_word', 'SregU32', '', ''], + 'K' : ['', 'extData.imm_u32', 'k', 'SregU32', '', ''] +} + +CoreMethodMap = { + 'getSRC_NOLDS_U32' : 'getSRC_U32', + 'getSRC_NOLDS_I32' : 'getSRC_I32', + 'getSRC_NOLDS_F16' : 'getSRC_F16', + 'getSRC_NOLDS_U16' : 'getSRC_U16', + 'getSRC_NOLIT_F16' : 'getSRC_F16', + 'getSRC_SIMPLE_F16' : 'getSRC_F16', + 'getSRC_NOLIT_F32' : 'getSRC_F32', + 'getSRC_SIMPLE_F32' : 'getSRC_F32', + 'getSRC_NOLIT_F64' : 'getSRC_F64', + 'getSRC_SIMPLE_F64' : 'getSRC_F64', + 'getSRC_NOLIT_U16' : 'getSRC_U16', + 'getSRC_SIMPLE_U16' : 'getSRC_U16', + 'getSRC_NOLIT_U32' : 'getSRC_U32', + 'getSRC_SIMPLE_U32' : 'getSRC_U32', + 'getSRC_NOLIT_U64' : 'getSRC_U64', + 'getSRC_SIMPLE_U64' : 'getSRC_U64', + 'getSRC_NOLIT_I32' : 'getSRC_I32', + 'getSRC_SIMPLE_I32' : 'getSRC_I32', + 'getSRC_VGPR_F32' : 'getSRC_F32', + 'getSRC_SIMPLE_I64' : 'getSRC_I64' +} + +KnownExceptions = [ + 'Inst_SOP2__S_CBRANCH_G_FORK', + 'Inst_SOPK__S_ADDK_I32', + 'Inst_SOPK__S_CBRANCH_I_FORK', + 'Inst_SOPK__S_GETREG_B32', + 'Inst_SOP1__S_CBRANCH_JOIN', + 'Inst_SOPP__S_CBRANCH_CDBGSYS', + 'Inst_SOPP__S_CBRANCH_CDBGUSER', + 'Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER', + 'Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER', + 'Inst_SOPP__S_SET_GPR_IDX_MODE', + 'Inst_VOP1__V_CVT_OFF_F32_I4', + 'Inst_VOP1__V_FFBH_U32', + 'Inst_VOP1__V_FFBL_B32', + 'Inst_VOP1__V_FFBH_I32', + 'Inst_VOPC__V_CMP_CLASS_F32', + 'Inst_VOPC__V_CMPX_CLASS_F32', + 'Inst_VOPC__V_CMP_CLASS_F64', + 'Inst_VOPC__V_CMPX_CLASS_F64', + 'Inst_VOPC__V_CMP_CLASS_F16', + 'Inst_VOPC__V_CMPX_CLASS_F16', + 'Inst_VINTRP__V_INTERP_MOV_F32', + 'Inst_VOP3__V_CMP_CLASS_F32', + 'Inst_VOP3__V_CMPX_CLASS_F32', + 'Inst_VOP3__V_CMP_CLASS_F64', + 'Inst_VOP3__V_CMPX_CLASS_F64', + 'Inst_VOP3__V_CMP_CLASS_F16', + 'Inst_VOP3__V_CMPX_CLASS_F16', + 'Inst_VOP3__V_CVT_OFF_F32_I4', + 'Inst_VOP3__V_FFBH_U32', + 'Inst_VOP3__V_FFBL_B32', + 'Inst_VOP3__V_FFBH_I32', + 'Inst_VOP3__V_CUBEID_F32', + 'Inst_VOP3__V_CUBESC_F32', + 'Inst_VOP3__V_CUBETC_F32', + 'Inst_VOP3__V_CUBEMA_F32', + 'Inst_VOP3__V_DIV_FIXUP_F32', + 'Inst_VOP3__V_DIV_FIXUP_F64', + 'Inst_VOP3__V_DIV_SCALE_F32', + 'Inst_VOP3__V_DIV_SCALE_F64', + 'Inst_VOP3__V_DIV_FMAS_F32', + 'Inst_VOP3__V_DIV_FMAS_F64', + 'Inst_VOP3__V_MSAD_U8', + 'Inst_VOP3__V_QSAD_PK_U16_U8', + 'Inst_VOP3__V_MQSAD_PK_U16_U8', + 'Inst_VOP3__V_MQSAD_U32_U8', + 'Inst_VOP3__V_PERM_B32', + 'Inst_VOP3__V_DIV_FIXUP_F16', + 'Inst_VOP3__V_CVT_PKACCUM_U8_F32', + 'Inst_VOP3__V_INTERP_MOV_F32', + 'Inst_VOP3__V_INTERP_P1LV_F16', + 'Inst_VOP3__V_MBCNT_LO_U32_B32', + 'Inst_VOP3__V_MBCNT_HI_U32_B32', + 'Inst_VOP3__V_TRIG_PREOP_F64', + 'Inst_VOP3__V_CVT_PKNORM_I16_F32', + 'Inst_VOP3__V_CVT_PKNORM_U16_F32', + 'Inst_VOP3__V_CVT_PKRTZ_F16_F32', + 'Inst_VOP3__V_CVT_PK_U16_U32', + 'Inst_VOP3__V_CVT_PK_I16_I32', + 'Inst_DS__DS_READ_U8', + 'Inst_DS__DS_READ_U16', + 'Inst_DS__DS_SWIZZLE_B32', + 'Inst_DS__DS_GWS_BARRIER', + 'Inst_DS__DS_CONSUME', + 'Inst_DS__DS_APPEND', + 'Inst_DS__DS_ORDERED_COUNT', + 'Inst_MIMG__IMAGE_GATHER4', + 'Inst_MIMG__IMAGE_GATHER4_CL', + 'Inst_MIMG__IMAGE_GATHER4_L', + 'Inst_MIMG__IMAGE_GATHER4_B', + 'Inst_MIMG__IMAGE_GATHER4_B_CL', + 'Inst_MIMG__IMAGE_GATHER4_LZ', + 'Inst_MIMG__IMAGE_GATHER4_C', + 'Inst_MIMG__IMAGE_GATHER4_C_CL', + 'Inst_MIMG__IMAGE_GATHER4_C_L', + 'Inst_MIMG__IMAGE_GATHER4_C_B', + 'Inst_MIMG__IMAGE_GATHER4_C_B_CL', + 'Inst_MIMG__IMAGE_GATHER4_C_LZ' +] + +KnownEmpty = [ + 'Inst_SOPK__S_SETREG_B32', + 'Inst_SOPK__S_SETREG_IMM32_B32', + 'Inst_SOPP__S_WAKEUP', + 'Inst_SOPP__S_BARRIER', + 'Inst_SOPP__S_SETKILL', + 'Inst_SOPP__S_WAITCNT', + 'Inst_SOPP__S_SETHALT', + 'Inst_SOPP__S_SLEEP', + 'Inst_SOPP__S_SETPRIO', + 'Inst_SOPP__S_SENDMSG', + 'Inst_SOPP__S_SENDMSGHALT', + 'Inst_SOPP__S_ICACHE_INV', + 'Inst_SOPP__S_INCPERFLEVEL', + 'Inst_SOPP__S_DECPERFLEVEL', + 'Inst_SOPP__S_TTRACEDATA', + 'Inst_SOPP__S_SET_GPR_IDX_OFF', + 'Inst_SMEM__S_DCACHE_INV', + 'Inst_SMEM__S_DCACHE_WB', + 'Inst_SMEM__S_DCACHE_INV_VOL', + 'Inst_SMEM__S_DCACHE_WB_VOL', + 'Inst_SMEM__S_MEMTIME', + 'Inst_SMEM__S_MEMREALTIME', + 'Inst_SMEM__S_ATC_PROBE', + 'Inst_SMEM__S_ATC_PROBE_BUFFER', + 'Inst_VOP1__V_READFIRSTLANE_B32', + 'Inst_VOP1__V_FREXP_EXP_I32_F64', + 'Inst_VOP1__V_FREXP_MANT_F64', + 'Inst_VOP1__V_FRACT_F64', + 'Inst_VOP1__V_CLREXCP', + 'Inst_VOP1__V_RCP_F16', + 'Inst_VOP1__V_SQRT_F16', + 'Inst_VOP1__V_RSQ_F16', + 'Inst_VOP1__V_LOG_F16', + 'Inst_VOP1__V_EXP_F16', + 'Inst_VOP1__V_FREXP_MANT_F16', + 'Inst_VOP1__V_FREXP_EXP_I16_F16', + 'Inst_VOP3__V_FREXP_EXP_I32_F64', + 'Inst_VOP3__V_FREXP_MANT_F64', + 'Inst_VOP3__V_FRACT_F64', + 'Inst_VOP3__V_CLREXCP', + 'Inst_VOP3__V_RCP_F16', + 'Inst_VOP3__V_SQRT_F16', + 'Inst_VOP3__V_RSQ_F16', + 'Inst_VOP3__V_LOG_F16', + 'Inst_VOP3__V_EXP_F16', + 'Inst_VOP3__V_FREXP_MANT_F16', + 'Inst_VOP3__V_FREXP_EXP_I16_F16', + 'Inst_VOP3__V_READLANE_B32', + 'Inst_VOP3__V_WRITELANE_B32', + 'Inst_DS__DS_WRXCHG2_RTN_B32', + 'Inst_DS__DS_WRXCHG2ST64_RTN_B32', + 'Inst_DS__DS_PERMUTE_B32', + 'Inst_DS__DS_BPERMUTE_B32', + 'Inst_DS__DS_WRXCHG2_RTN_B64', + 'Inst_DS__DS_WRXCHG2ST64_RTN_B64', + 'Inst_DS__DS_CONDXCHG32_RTN_B64', + 'Inst_DS__DS_GWS_SEMA_RELEASE_ALL', + 'Inst_DS__DS_GWS_INIT', + 'Inst_DS__DS_GWS_SEMA_V', + 'Inst_DS__DS_GWS_SEMA_BR', + 'Inst_DS__DS_GWS_SEMA_P', + 'Inst_DS__DS_READ_B96', + 'Inst_DS__DS_READ_B128', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_X', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_XY', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_X', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_XY', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ', + 'Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ', + 'Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW', + 'Inst_MUBUF__BUFFER_LOAD_UBYTE', + 'Inst_MUBUF__BUFFER_LOAD_SBYTE', + 'Inst_MUBUF__BUFFER_LOAD_USHORT', + 'Inst_MUBUF__BUFFER_LOAD_SSHORT', + 'Inst_MUBUF__BUFFER_STORE_BYTE', + 'Inst_MUBUF__BUFFER_STORE_SHORT', + 'Inst_MUBUF__BUFFER_STORE_LDS_DWORD', + 'Inst_MUBUF__BUFFER_WBINVL1', + 'Inst_MUBUF__BUFFER_WBINVL1_VOL', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_X', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_X', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_XY', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ', + 'Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ', + 'Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW', + 'Inst_MIMG__IMAGE_LOAD', + 'Inst_MIMG__IMAGE_LOAD_MIP', + 'Inst_MIMG__IMAGE_LOAD_PCK', + 'Inst_MIMG__IMAGE_LOAD_PCK_SGN', + 'Inst_MIMG__IMAGE_LOAD_MIP_PCK', + 'Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN', + 'Inst_MIMG__IMAGE_STORE', + 'Inst_MIMG__IMAGE_STORE_MIP', + 'Inst_MIMG__IMAGE_STORE_PCK', + 'Inst_MIMG__IMAGE_STORE_MIP_PCK', + 'Inst_MIMG__IMAGE_GET_RESINFO', + 'Inst_MIMG__IMAGE_SAMPLE', + 'Inst_MIMG__IMAGE_SAMPLE_CL', + 'Inst_MIMG__IMAGE_SAMPLE_D', + 'Inst_MIMG__IMAGE_SAMPLE_D_CL', + 'Inst_MIMG__IMAGE_SAMPLE_L', + 'Inst_MIMG__IMAGE_SAMPLE_B', + 'Inst_MIMG__IMAGE_SAMPLE_B_CL', + 'Inst_MIMG__IMAGE_SAMPLE_LZ', + 'Inst_MIMG__IMAGE_SAMPLE_C', + 'Inst_MIMG__IMAGE_SAMPLE_C_CL', + 'Inst_MIMG__IMAGE_SAMPLE_C_D', + 'Inst_MIMG__IMAGE_SAMPLE_C_D_CL', + 'Inst_MIMG__IMAGE_SAMPLE_C_L', + 'Inst_MIMG__IMAGE_SAMPLE_C_B', + 'Inst_MIMG__IMAGE_SAMPLE_C_B_CL', + 'Inst_MIMG__IMAGE_SAMPLE_C_LZ', + 'Inst_MIMG__IMAGE_SAMPLE_O', + 'Inst_MIMG__IMAGE_SAMPLE_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_D_O', + 'Inst_MIMG__IMAGE_SAMPLE_D_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_L_O', + 'Inst_MIMG__IMAGE_SAMPLE_B_O', + 'Inst_MIMG__IMAGE_SAMPLE_B_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_LZ_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_D_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_L_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_B_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_LZ_O', + 'Inst_MIMG__IMAGE_GATHER4_O', + 'Inst_MIMG__IMAGE_GATHER4_CL_O', + 'Inst_MIMG__IMAGE_GATHER4_L_O', + 'Inst_MIMG__IMAGE_GATHER4_B_O', + 'Inst_MIMG__IMAGE_GATHER4_B_CL_O', + 'Inst_MIMG__IMAGE_GATHER4_LZ_O', + 'Inst_MIMG__IMAGE_GATHER4_C_O', + 'Inst_MIMG__IMAGE_GATHER4_C_CL_O', + 'Inst_MIMG__IMAGE_GATHER4_C_L_O', + 'Inst_MIMG__IMAGE_GATHER4_C_B_O', + 'Inst_MIMG__IMAGE_GATHER4_C_B_CL_O', + 'Inst_MIMG__IMAGE_GATHER4_C_LZ_O', + 'Inst_MIMG__IMAGE_GET_LOD', + 'Inst_MIMG__IMAGE_SAMPLE_CD', + 'Inst_MIMG__IMAGE_SAMPLE_CD_CL', + 'Inst_MIMG__IMAGE_SAMPLE_C_CD', + 'Inst_MIMG__IMAGE_SAMPLE_C_CD_CL', + 'Inst_MIMG__IMAGE_SAMPLE_CD_O', + 'Inst_MIMG__IMAGE_SAMPLE_CD_CL_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_CD_O', + 'Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O', + 'Inst_EXP__EXP', + 'Inst_FLAT__FLAT_LOAD_UBYTE', + 'Inst_FLAT__FLAT_LOAD_SBYTE', + 'Inst_FLAT__FLAT_LOAD_USHORT', + 'Inst_FLAT__FLAT_LOAD_SSHORT', + 'Inst_FLAT__FLAT_STORE_BYTE', + 'Inst_FLAT__FLAT_STORE_SHORT', +] diff --git a/src/arch/gcn3/decoder.cc b/src/arch/amdgpu/gcn3/decoder.cc similarity index 99% rename from src/arch/gcn3/decoder.cc rename to src/arch/amdgpu/gcn3/decoder.cc index 706232548f..366b9fba02 100644 --- a/src/arch/gcn3/decoder.cc +++ b/src/arch/amdgpu/gcn3/decoder.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -31,12 +31,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/gcn3/gpu_decoder.hh" - #include -#include "arch/gcn3/insts/gpu_static_inst.hh" -#include "arch/gcn3/insts/instructions.hh" +#include "arch/amdgpu/gcn3/gpu_decoder.hh" +#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" +#include "arch/amdgpu/gcn3/insts/instructions.hh" namespace Gcn3ISA { diff --git a/src/arch/amdgpu/gcn3/description_objects.py b/src/arch/amdgpu/gcn3/description_objects.py new file mode 100644 index 0000000000..f174203a5e --- /dev/null +++ b/src/arch/amdgpu/gcn3/description_objects.py @@ -0,0 +1,367 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +class Clause(object): + def __init__(self): + self.keyword = '' + +class CommentClause(Clause): + def __init__(self): + self.keyword = 'comment' + self.content = [] + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' content: %s ]' % repr(self.content) + return text + +class AssignmentClause(Clause): + def __init__(self): + self.keyword = 'assignment' + self.dst = None + self.op = None + self.src = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' dst: %s,' % repr(self.dst) + text += ' op: %s,' % repr(self.op) + text += ' src: %s ]' % repr(self.src) + return text + +class BinaryClause(Clause): + def __init__(self): + self.keyword = 'binary' + self.left = None + self.op = None + self.right = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' left: %s,' % repr(self.left) + text += ' op: %s,' % repr(self.op) + text += ' right: %s ]' % repr(self.right) + return text + +class UnaryClause(Clause): + def __init__(self): + self.keyword = 'unary' + self.op = None + self.oprnd = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' op: %s,' % repr(self.op) + text += ' oprnd: %s ]' % repr(self.oprnd) + return text + +class ConditionalClause(Clause): + def __init__(self): + self.keyword = 'conditional' + self.cond = None + self.true = None + self.false = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' cond: %s,' % repr(self.cond) + text += ' true: %s,' % repr(self.true) + text += ' false: %s ]' % repr(self.false) + return text + +class VariableClause(Clause): + def __init__(self): + self.keyword = 'variable' + self.name = '' + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' name: %s ]' % repr(self.name) + return text + +class ConstantClause(Clause): + def __init__(self): + self.keyword = 'constant' + self.value = 0 + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + if type(self.value) is 'float': + text += ' value: %f ]' % (self.value) + else: + text += ' value: 0x%x ]' % (self.value) + + return text + +class DataRegClause(Clause): + def __init__(self): + self.keyword = 'data_reg' + self.reg = None + self.idx = None + self.typ = [ 'default', -1 ] + self.rng = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' reg: %s,' % repr(self.reg) + if self.typ: + text += ' typ: %s,' % repr(self.typ) + else: + text += ' typ: None,' + if self.idx: + text += ' idx: %s,' % repr(self.idx) + else: + text += ' idx: None,' + if self.rng: + text += ' rng: %s ]' % repr(self.rng) + else: + text += ' rng: None ]' + return text + +class FunctionClause(Clause): + def __init__(self): + self.keyword = 'function' + self.func = '' + self.args = [] + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' func: %s,' % repr(self.func) + text += ' args: %s ]' % repr(self.args) + return text + +class IfThenElseClause(Clause): + def __init__(self): + self.keyword = 'ifthenelse' + self.cond = None + self.then_stmt = None + self.else_stmt = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + if self.cond: + text += ' cond: %s,' % repr(self.cond) + else: + text += ' cond: None,' + if self.then_stmt: + text += ' then: %s,' % repr(self.then_stmt) + else: + text += ' then: None,' + if self.else_stmt: + text += ' else: %s ]' % repr(self.else_stmt) + else: + text += ' else: None ]' + return text + +class IfClause(Clause): + def __init__(self): + self.keyword = 'if' + self.cond = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + if self.cond: + text += ' cond: %s ]' % repr(self.cond) + else: + text += ' cond: None ]' + return text + +class ForClause(Clause): + def __init__(self): + self.keyword = 'for' + self.variable = None + self.start = None + self.end = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' variable: %s,' % repr(self.variable) + text += ' start: %s,' % repr(self.start) + text += ' end: %s ]' % repr(self.end) + return text + +class ElseClause(Clause): + def __init__(self): + self.keyword = 'else' + + def __repr__(self): + text = '[ kw: %s ]' % self.keyword + return text + +class EndClause(Clause): + def __init__(self): + self.keyword = 'end' + + def __repr__(self): + text = '[ kw: %s ]' % self.keyword + return text + +class ElseIfClause(Clause): + def __init__(self): + self.keyword = 'elseif' + self.cond = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + if self.cond: + text += ' cond: %s ]' % repr(self.cond) + else: + text += ' cond: None ]' + return text + +class TabClause(Clause): + def __init__(self): + self.keyword = 'tab' + self.stmt = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + if self.stmt: + text += ' stmt: %s ]' % repr(self.stmt) + else: + text += ' stmt: None ]' + return text + +class GroupClause(Clause): + def __init__(self): + self.keyword = 'group' + self.group = [] + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' group: %s ]' % repr(self.group) + return text + +class CastClause(Clause): + def __init__(self): + self.keyword = 'cast' + self.typ = '' + self.var = '' + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' typ: %s,' % repr(self.typ) + text += ' var: %s ]' % repr(self.var) + return text + +class CommaClause(Clause): + def __init__(self): + self.keyword = 'comma' + self.left = None + self.right = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' left: %s,' % repr(self.left) + text += ' right: %s ]' % repr(self.right) + return text + +class ChainClause(Clause): + def __init__(self): + self.keyword = 'chain' + self.left = None + self.right = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' left: %s,' % repr(self.left) + text += ' right: %s ]' % repr(self.right) + return text + +class MemClause(Clause): + def __init__(self): + self.keyword = 'mem' + self.mem = None + self.addr = None + self.rng = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' mem: %s,' % repr(self.mem) + text += ' addr: %s,' % repr(self.addr) + if self.rng: + text += ' rng: %s ]' % repr(self.rng) + else: + text += ' rng: None ]' + return text + +class GprClause(Clause): + def __init__(self): + self.keyword = 'gpr' + self.gpr = None + self.idx = None + self.typ = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' gpr: %s,' % repr(self.gpr) + text += ' idx: %s,' % repr(self.idx) + text += ' typ: %s ]' % repr(self.typ) + return text + +class ParenClause(Clause): + def __init__(self): + self.keyword = 'paren' + self.parexp = None + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' parexp: %s ]' % repr(self.parexp) + return text + +class SizeClause(Clause): + def __init__(self): + self.keyword = 'size' + self.size = 0 + + def __repr__(self): + text = '[ kw: %s,' % self.keyword + text += ' size: %d ]' % self.size + return text + +TypeToDetails = { + 'f16' : ( 'SregF16', 16 ), + 'i16' : ( 'SregU16', 16 ), + 'u16' : ( 'SregU16', 16 ), + 'f' : ( 'SregF32', 32 ), + 'f32' : ( 'SregF32', 32 ), + 'i' : ( 'SregI32', 32 ), + 'i32' : ( 'SregI32', 32 ), + 'u' : ( 'SregU32', 32 ), + 'u32' : ( 'SregU32', 32 ), + 'd' : ( 'SregF64', 64 ), + 'i64' : ( 'SregI64', 64 ), + 'u64' : ( 'SregU64', 64 ), + 'u96' : ( 'SregU96', 96 ), + 'u128' : ( 'SregU128', 128 ), + 'u256' : ( 'SregU256', 256 ), + 'u512' : ( 'SregU512', 512 ) +} diff --git a/src/arch/amdgpu/gcn3/description_parser.py b/src/arch/amdgpu/gcn3/description_parser.py new file mode 100644 index 0000000000..8ac6a46f93 --- /dev/null +++ b/src/arch/amdgpu/gcn3/description_parser.py @@ -0,0 +1,1327 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import sys + +from description_objects import * +from m5.util.grammar import Grammar +from pprint import pprint, pformat + +class ParseError(Exception): + pass + +class DescriptionParser(Grammar): + def __init__(self): + super(DescriptionParser, self).__init__() + self.in_text = False + self.single_subs = [ + [ '1 if S0 is chosen as the minimum value.', + '(S0 < S1) ? 1 : 0;' ], + [ '1 if S0 is chosen as the maximum value.', + '(S0 > S1) ? 1 : 0;' ], + [ '1 if result is non-zero.', + '(D != 0) ? 1 : 0;' ], + [ '(SCC) then D.u = S0.u;', + 'if(SCC) then D.u = S0.u;' ], + [ 'SCC = 1 if the new value of EXEC is non-zero.', + 'SCC = (EXEC != 0) ? 1 : 0;' ], + [ 'D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); ' + 'VOP3: specify VCC as a scalar GPR in S2.', + 'D.u = (VCC[threadID] ? S1.u : S0.u);' ], + [ 'D.u = QuadMask(S0.u):', + 'D.u = QuadMask(S0.u);' ], + [ 'D.u64 = QuadMask(S0.u64):', + 'D.u64 = QuadMask(S0.u64);' ], + [ 'A = ADDR_BASE;', + '' ], + [ 'B = A + 4*(offset1[7] ? {A[31],A[31:17]} : ' + '{offset1[6],offset1[6:0],offset0});', + '' ], + [ 'MEM_ADDR', + 'MEM' ], + [ 'Untyped buffer load dword.', + 'DATA.u32 = MEM[ADDR];' ], + [ 'Untyped buffer load 2 dwords.', + 'DATA.u64 = MEM[ADDR];' ], + [ 'Untyped buffer load 3 dwords.', + 'DATA.u96 = MEM[ADDR];' ], + [ 'Untyped buffer load 4 dwords.', + 'DATA.u128 = MEM[ADDR];' ], + [ 'Untyped buffer store dword.', + 'MEM[ADDR] = DATA.u32;' ], + [ 'Untyped buffer store 2 dwords.', + 'MEM[ADDR] = DATA.u64;' ], + [ 'Untyped buffer store 3 dwords.', + 'MEM[ADDR] = DATA.u96;' ], + [ 'Untyped buffer store 4 dwords.', + 'MEM[ADDR] = DATA.u128;' ], + [ 'Read 1 dword from scalar data cache.', + 'DATA.u32 = MEM[ADDR];' ], + [ 'Read 2 dwords from scalar data cache.', + 'DATA.u64 = MEM[ADDR];' ], + [ 'Read 4 dwords from scalar data cache.', + 'DATA.u128 = MEM[ADDR];' ], + [ 'Read 8 dwords from scalar data cache.', + 'DATA.u256 = MEM[ADDR];' ], + [ 'Read 16 dwords from scalar data cache.', + 'DATA.u512 = MEM[ADDR];' ], + [ 'Write 1 dword to scalar data cache.', + 'MEM[ADDR] = DATA.u32;' ], + [ 'Write 2 dwords to scalar data cache.', + 'MEM[ADDR] = DATA.u64;' ], + [ 'Write 4 dwords to scalar data cache.', + 'MEM[ADDR] = DATA.u128;' ], + [ 'D[0] = OR(S0[3:0]),', + '# D[0] = OR(S0[3:0]),' ], + [ '(unsigned 16-bit integer domain)', + 'in the unsigned 16-bit integer domain' ], + [ ' (unsigned compare);', + '; # (unsigned compare)' ], + [ '2s_complement', + 'TwosComplement' ], + [ 'floor(S0.f16) is even', + 'floor_is_even(S0.f16)' ], + [ 'D.u[31:0] = S0.u[0:31],', + 'D.u[31:0] = S0.u[0:31];' ], + [ 'D.d = trunc(S0.d), return', + 'D.d = trunc(S0.d); return' ], + [ 'D.f = trunc(S0.f), return', + 'D.f = trunc(S0.f); return' ], + [ 'D[i] = (S0[(i & ~3):(i | 3)] != 0)', + 'D = whole_quad_mode(S0)' ], + [ 'M0[15:12] = SIMM4.', + 'M0[15:12] = SIMM16.' ], + [ 'D = VCC in VOPC encoding.', + '# D = VCC in VOPC encoding.' ], + [ 'S0.u64 is a', + '# S0.u64 is a' ], + [ 'D.f = S0.f * K + S1.f; K is a', + 'D.f = S0.f * K + S2.f; # K is a' ], + [ 'D.f16 = S0.f16 * K.f16 + S1.f16; K is a', + 'D.f16 = S0.f16 * K.f16 + S2.f16; # K is a' ], + [ 'K is a', + '# K is a' ], + [ '(2 ** S1.i16)', + 'pow(2.0, S1.i16)' ], + [ 'original', + '# original' ], + [ 'attr_word selects', + '# attr_word selects' ], + [ '; S0 is ', + '; # S0 is ' ], + [ '. S0 is ', + '. # S0 is ' ], + [ 'SIMM16 = ', + '# SIMM16 = ' ], + [ 'SIMM16[3:0] = ', + '# SIMM16[3:0] = ' ], + [ 'SIMM16[6:4] = ', + '# SIMM16[6:4] = ' ], + [ 'SIMM16[12:8] = ', + '# SIMM16[12:8] = ' ], + [ 'SIMM16[9:0] contains', + '# SIMM16[9:0] contains' ], + [ 'Exponent', + 'exponent' ], + [ 'Mantissa', + 'mantissa' ], + [ 'Corrupted', + 'corrupted' ], + [ 'threadId', + 'threadID' ], + [ '16\'h0', + 'zeros(16)' ], + [ '24\'h0', + 'zeros(24)' ], + [ 'DATA.', + 'DATA;' ], + [ 'DATA2.', + 'DATA2;' ], + [ 'tmp.', + 'tmp;' ], + [ 'vcc_out', + 'VCC[threadID]' ], + [ '].\n', + '];\n' ], + [ '\\t tD', + '\\t D' ] + ] + self.repeat_subs = [ + ['MEM[A]', 'MEM[ADDR]'], + ['MEM[B]', 'MEM[ADDR, offset1, offset0]'], + ['DATA[0]', 'DATA_0'], + ['DATA[1]', 'DATA_1'], + ['DATA[0:1]', 'DATA_0'], + ['DATA[2:3]', 'DATA_2'], + ['0x800000000ULL', '0x100000000ULL'], + ['<>', '!='] + ] + + # lexer + states = ( + ('comment', 'exclusive'), + ('notes', 'exclusive'), + ) + + reserved = ( + 'A', + 'ABS', + 'ADDR', + 'ADDR_BASE', + 'ATTR_WORD', + 'APPROXIMATE2TOX', + 'APPROXIMATELOG2', + 'APPROXIMATERECIP', + 'APPROXIMATERECIPSQRT', + 'APPROXIMATESQRT', + 'B', + 'CMP', + 'COUNTONEBITS', + 'COUNTZEROBITS', + 'CORRUPTED', + 'COS', + 'D', + 'DATA', + 'DATA2', + 'DATA_0', + 'DATA_1', + 'DATA_2', + 'DOUBLE', + 'ELSE', + 'ELSIF', + 'END', + 'EXEC', + 'EXPONENT', + 'F', + 'F16', + 'F32', + 'FINDFIRSTONE', + 'FINDFIRSTZERO', + 'FIRSTOPPOSITESIGNBIT', + 'FLOAT', + 'FLOOR', + 'FLOOR_IS_EVEN', + 'FLT16_TO_FLT32', + 'FLT16_TO_INT16', + 'FLT16_TO_UINT16', + 'FLT32_TO_FLT16', + 'FLT32_TO_FLT64', + 'FLT32_TO_INT32', + 'FLT32_TO_UINT32', + 'FLT32_TO_UINT8', + 'FLT64_TO_INT32', + 'FLT64_TO_FLT32', + 'FLT64_TO_UINT32', + 'FRACT', + 'I', + 'I16', + 'I32', + 'INF', + 'INT', + 'INT16_TO_FLT16', + 'INT32_FLOOR', + 'INT32_TO_FLT32', + 'INT32_TO_FLT64', + 'I64', + 'IF', + 'INST_ATC', + 'ISNAN', + 'K', + 'LOG2', + 'MANTISSA', + 'MAX', + 'MEDIAN', + 'MEM', + 'MIN', + 'M0', + 'NAN', + 'NOP', + 'OFFSET0', + 'OFFSET1', + 'OPCODE_SIZE_IN_BITS', + 'PC', + 'PI', + 'POW', + 'POWER2', + 'PRIV', + 'P0', + 'P10', + 'P20', + 'QUADMASK', + 'RETURN_DATA', + 'RETURN_DATA_0', + 'RETURN_DATA_1', + 'ROUND_NEAREST_EVEN', + 'QUOTE', + 'S', + 'S0', + 'S1', + 'S2', + 'SAD_U8', + 'SCC', + 'SGPR', + 'SIGNEXT', + 'SIN', + 'SIMM16', + 'SIMM4', + 'SNORM', + 'SQRT', + 'SRC', + 'TBA', + 'THEN', + 'THREADID', + 'TMP', + 'TRUNC', + 'TWOSCOMPLEMENT', + 'U', + 'U16', + 'U32', + 'U64', + 'U96', + 'U128', + 'U256', + 'U512', + 'UINT16_TO_FLT16', + 'UINT32_TO_FLT32', + 'UINT32_TO_FLT64', + 'UINT8_TO_FLT32', + 'UNORM', + 'UNSIGNED', + 'VCC', + 'VGPR', + 'VSKIP', + 'WHOLE_QUAD_MODE', + 'ZEROS' + ) + + tokens = reserved + ( + 'ADD', + 'ADDEQ', + 'ADDADD', + 'AND', + 'ANDAND', + 'ANDEQ', + 'COLON', + 'COMMA', + 'COMMENT', + 'DIVEQ', + 'DIV', + 'DO', + 'DOT', + 'DOTDOTDOT', + 'EQ', + 'EQUALS', + 'FOR', + 'GE', + 'GT', + 'IN', + 'INV', + 'LBRACE', + 'LBRACKET', + 'LE', + 'LG', + 'LPAREN', + 'LSH', + 'LSHEQ', + 'LT', + 'MODEQ', + 'MOD', + 'MUL', + 'MULEQ', + 'MULMUL', + 'NE', + 'NEWLINE', + 'NOT', + 'NUMBER', + 'OR', + 'OREQ', + 'OROR', + 'RBRACE', + 'RBRACKET', + 'RPAREN', + 'RSH', + 'RSHEQ', + 'QUESTION', + 'SEMI', + 'SUB', + 'SUBEQ', + 'SUBSUB', + 'TAB', + 'XOR', + 'XOREQ', + 'ID', + ) + + t_ADD = r'\+' + t_ADDADD = r'\+\+' + t_ADDEQ = r'\+=' + t_AND = r'&' + t_ANDAND = r'&&' + t_ANDEQ = r'&=' + t_COLON = r':' + t_COMMA = r',' + t_DIV = r'/' + t_DIVEQ = r'/=' + t_DOT = r'\.' + t_DOTDOTDOT = r'\.\.\.' + t_EQ = r'==' + t_EQUALS = r'=' + t_GE = r'>=' + t_GT = r'>' + t_INV = r'~' + t_LBRACE = r'\{' + t_LBRACKET = r'\[' + t_LE = r'<=' + t_LPAREN = r'\(' + t_LSH = r'<<' + t_LSHEQ = r'<<=' + t_LT = r'<' + t_LG = r'<>' + t_MOD = r'%' + t_MODEQ = r'%=' + t_MUL = r'\*' + t_MULMUL = r'\*\*' + t_NE = r'!=' + t_NOT = r'!' + t_OR = r'\|' + t_OREQ = r'\|=' + t_OROR = r'\|\|' + t_RBRACE = r'\}' + t_RBRACKET = r'\]' + t_RPAREN = r'\)' + t_RSH = r'>>' + t_RSHEQ = r'>>=' + t_QUESTION = r'\?' + t_QUOTE = r'\'' + t_SEMI = r';' + t_SUB = r'-' + t_SUBEQ = r'-=' + t_SUBSUB = r'--' + t_TAB = r'\\t' + t_XOR = r'\^' + t_XOREQ = r'\^=' + + reserved_map = { } + for r in reserved: + reserved_map[r.lower()] = r + + def t_HASH(self, t): + r'\#' + t.lexer.begin('comment') + text = '' + while True: + tok = t.lexer.token() + if not tok: + break; + text += tok.value + if tok.type == 'NEWLINE': + break; + t.lexer.begin('INITIAL') + t.type = 'COMMENT' + t.value = text + return t + + def t_DIVDIV(self, t): + r'//' + t.lexer.begin('comment') + text = '' + while True: + tok = t.lexer.token() + if not tok: + break; + text += tok.value + if tok.type == 'NEWLINE': + break; + t.lexer.begin('INITIAL') + t.type = 'COMMENT' + t.value = text + return t + + def t_CODE(self, t): + r'@code' + t.lexer.begin('INITIAL') + pass + + def t_TEXT(self, t): + r'@text' + t.lexer.begin('comment') + text = '' + while True: + tok = t.lexer.token() + if not tok: + break; + text += tok.value + t.lexer.begin('INITIAL') + t.type = 'COMMENT' + t.value = text + return t + + def t_ID(self, t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + if t.value[0].isupper() and t.value[1:].islower(): + t.lexer.begin('comment') + else: + t.type = self.reserved_map.get(t.value.lower(), 'ID') + if t.type == 'ID': + t.lexer.begin('notes') + return t + + # comment state lexer tokens + # --- consume everything up to the next NEWLINE + def t_comment_COMMENT(self, t): + r'[^\n]+' + return t + + def t_comment_NEWLINE(self, t): + r'\n+' + t.lexer.lineno += t.value.count('\n') + t.lexer.begin('INITIAL') + return t + + t_comment_ignore = ' \t\x0c' + + def t_comment_error(self, t): + sys.exit('%d: illegal character "%s"' % (t.lexer.lineno, t.value[0])) + + # notes state lexer tokens + # -- consume everything but '(' and ')' up to the next NEWLINE + def t_notes_COMMENT(self, t): + r'[^\(\)\n]+' + return t + + def t_notes_LPAREN(self, t): + r'\(' + t.lexer.begin('INITIAL') + return t + + def t_notes_RPAREN(self, t): + r'\)' + t.lexer.begin('INITIAL') + return t + + def t_notes_NEWLINE(self, t): + r'\n+' + t.lexer.lineno += t.value.count('\n') + t.lexer.begin('INITIAL') + return t + + t_notes_ignore = ' \t\x0c' + + def t_notes_error(self, t): + sys.exit('%d: illegal character "%s"' % (t.lexer.lineno, t.value[0])) + + def t_NEWLINE(self, t): + r'\n+' + t.lexer.lineno += t.value.count('\n') + return t + + def t_HEXADECIMAL(self, t): + r'0[xX][0-9a-fA-F]+(ULL)?' + t.value = int(t.value.replace('ULL', ''), 16) + t.type = 'NUMBER' + return t + + def t_FLOAT(self, t): + r'[0-9][0-9]*\.[0-9][0-9]*(f)?' + t.value = float(t.value.replace('f', '')) + t.type = 'NUMBER' + return t + + def t_DECIMAL(self, t): + r'[0-9][0-9]*(ULL)?' + t.value = int(t.value.replace('ULL', ''), 10) + t.type = 'NUMBER' + return t + + t_ignore = ' \t\x0c' + + def t_error(self, t): + sys.exit('%d: illegal character "%s"' % (t.lexer.lineno, t.value[0])) + + + ## parser + start = 'specification' + + precedence = ( + ('left', 'OROR', 'ANDAND'), + ('left', 'OR', 'XOR', 'AND'), + ('left', 'EQ', 'NE'), + ('left', 'GE', 'GT', 'LE', 'LG', 'LT'), + ('left', 'LSH', 'RSH'), + ('left', 'ADD', 'SUB'), + ('left', 'DIV', 'MUL'), + ('left', 'MOD', 'MULMUL') + ) + + # empty rule + def p_empty_0(self, t): + ''' + empty : + ''' + pass + + # comment rule + def p_comment_0(self, t): + ''' + comment : COMMENT + | ID + | QUOTE + ''' + clause = CommentClause() + clause.content = [t[1]] + t[0] = clause + + def p_comment_1(self, t): + ''' + comment : LPAREN comment RPAREN + ''' + clause = CommentClause() + clause.content = [t[1], t[2], t[3]] + t[0] = clause + + def p_comment_2(self, t): + ''' + comment : LPAREN comment RPAREN COMMA + ''' + clause = CommentClause() + clause.content = [t[1], t[2], t[3], t[4]] + t[0] = clause + + def p_comment_3(self, t): + ''' + comment : comment COMMENT + ''' + t[1].content.append(t[2]) + t[0] = t[1] + + # data_reg rules + def p_data_reg_0(self, t): + ''' + data_reg : D + | S + | S0 + | S1 + | S2 + ''' + t[0] = t[1] + + # spec_reg rules + def p_spec_reg_0(self, t): + ''' + spec_reg : A + | ADDR + | ADDR_BASE + | ATTR_WORD + | B + | CMP + | DATA + | DATA2 + | DATA_0 + | DATA_1 + | DATA_2 + | EXEC + | I + | INF + | INST_ATC + | K + | M0 + | NAN + | OFFSET0 + | OFFSET1 + | OPCODE_SIZE_IN_BITS + | P0 + | P10 + | P20 + | PC + | PI + | PRIV + | RETURN_DATA + | RETURN_DATA_0 + | RETURN_DATA_1 + | SCC + | SIMM16 + | SIMM4 + | SRC + | TBA + | THREADID + | TMP + | VCC + | VSKIP + ''' + t[0] = t[1] + + # function rules + def p_function_0(self, t): + ''' + function : ABS + | APPROXIMATE2TOX + | APPROXIMATELOG2 + | APPROXIMATERECIP + | APPROXIMATERECIPSQRT + | APPROXIMATESQRT + | COUNTONEBITS + | COUNTZEROBITS + | CORRUPTED + | COS + | EXPONENT + | FINDFIRSTONE + | FINDFIRSTZERO + | FIRSTOPPOSITESIGNBIT + | FLOOR + | FLOOR_IS_EVEN + | FLT16_TO_FLT32 + | FLT16_TO_INT16 + | FLT16_TO_UINT16 + | FLT32_TO_FLT16 + | FLT32_TO_FLT64 + | FLT32_TO_INT32 + | FLT32_TO_UINT32 + | FLT32_TO_UINT8 + | FLT64_TO_FLT32 + | FLT64_TO_INT32 + | FLT64_TO_UINT32 + | FRACT + | INT16_TO_FLT16 + | INT32_FLOOR + | INT32_TO_FLT32 + | INT32_TO_FLT64 + | ISNAN + | LOG2 + | MANTISSA + | MAX + | MEDIAN + | MIN + | POWER2 + | QUADMASK + | POW + | ROUND_NEAREST_EVEN + | SAD_U8 + | SIGNEXT + | SIN + | SQRT + | TRUNC + | TWOSCOMPLEMENT + | UINT16_TO_FLT16 + | UINT32_TO_FLT32 + | UINT32_TO_FLT64 + | UINT8_TO_FLT32 + | WHOLE_QUAD_MODE + | ZEROS + ''' + t[0] = t[1] + + # data_type rules + def p_data_type_0(self, t): + ''' + data_type : empty + ''' + t[0] = ['empty', -1] + + def p_data_type_1(self, t): + ''' + data_type : DOT I16 + | DOT F16 + | DOT U16 + ''' + t[0] = [t[2], 16] + + def p_data_type_2(self, t): + ''' + data_type : DOT I + | DOT F + | DOT U + | DOT I32 + | DOT F32 + | DOT U32 + ''' + t[0] = [t[2], 32] + + def p_data_type_3(self, t): + ''' + data_type : DOT D + | DOT I64 + | DOT U64 + ''' + t[0] = [t[2], 64] + + def p_data_type_4(self, t): + ''' + data_type : DOT U96 + ''' + t[0] = [t[2], 96] + + def p_data_type_5(self, t): + ''' + data_type : DOT U128 + ''' + t[0] = [t[2], 128] + + def p_data_type_6(self, t): + ''' + data_type : DOT U256 + ''' + t[0] = [t[2], 256] + + def p_data_type_7(self, t): + ''' + data_type : DOT U512 + ''' + t[0] = [t[2], 512] + + # data_range rules + def p_data_range_0(self, t): + ''' + data_range : empty + ''' + t[0] = None + + def p_data_range_1(self, t): + ''' + data_range : LBRACKET expression RBRACKET + ''' + t[0] = t[2] + + def p_data_range_2(self, t): + ''' + data_range : LBRACKET expression COLON expression RBRACKET + ''' + t[0] = [t[2], t[4]] + + # reg_desc rules + def p_reg_desc_0(self, t): + ''' + reg_desc : data_reg data_type data_range + ''' + clause = DataRegClause() + clause.reg = t[1] + clause.typ = t[2] + clause.rng = t[3] + t[0] = clause + + def p_reg_desc_1(self, t): + ''' + reg_desc : spec_reg data_range data_type data_range + ''' + clause = DataRegClause() + clause.reg = t[1] + clause.typ = t[3] + if t[1] == 'SGPR' or t[1] == 'VGPR' or t[1] == 'MEM': + clause.idx = t[2] + clause.rng = t[4] + elif t[4]: + clause.idx = t[2] + clause.rng = t[4] + else: + clause.idx = None + clause.rng = t[2] + t[0] = clause + + def p_reg_desc_2(self, t): + ''' + reg_desc : LBRACE reg_desc COMMA reg_desc RBRACE + ''' + clause = GroupClause() + clause.group = [t[2], t[4]] + t[0] = clause + + def p_reg_desc_3(self, t): + ''' + reg_desc : LBRACE reg_desc COMMA reg_desc COMMA reg_desc RBRACE + ''' + clause = GroupClause() + clause.group = [t[2], t[4], t[6]] + t[0] = clause + + def p_reg_desc_4(self, t): + ''' + reg_desc : LBRACE reg_desc COMMA reg_desc COMMA reg_desc \ + COMMA reg_desc RBRACE + ''' + clause = GroupClause() + clause.group = [t[2], t[4], t[6], t[8]] + t[0] = clause + + def p_mem_desc_0(self, t): + ''' + mem_desc : MEM LBRACKET clist RBRACKET data_range + ''' + clause = MemClause() + clause.mem = t[1] + clause.addr = t[3] + clause.rng = t[5] + t[0] = clause + + def p_mem_desc_1(self, t): + ''' + mem_desc : LBRACE mem_desc COMMA mem_desc RBRACE + ''' + clause = GroupClause() + clause.group = [t[2], t[4]] + t[0] = clause + + def p_mem_desc_2(self, t): + ''' + mem_desc : LBRACE mem_desc COMMA mem_desc COMMA mem_desc RBRACE + ''' + clause = GroupClause() + clause.group = [t[2], t[4], t[6]] + t[0] = clause + + def p_mem_desc_3(self, t): + ''' + mem_desc : LBRACE mem_desc COMMA mem_desc COMMA mem_desc \ + COMMA mem_desc RBRACE + ''' + clause = GroupClause() + clause.group = [t[2], t[4], t[6], t[8]] + t[0] = clause + + def p_gpr_desc_0(self, t): + ''' + gpr_desc : SGPR LBRACKET expression RBRACKET data_type + | VGPR LBRACKET expression RBRACKET data_type + ''' + clause = GprClause() + clause.gpr = t[1] + clause.idx = t[3] + clause.typ = t[5] + t[0] = clause + + # clist rules + def p_clist_0(self, t): + ''' + clist : empty + ''' + pass + + def p_clist_1(self, t): + ''' + clist : expression + ''' + t[0] = t[1] + + def p_clist_2(self, t): + ''' + clist : clist COMMA expression + ''' + clause = CommaClause() + clause.left = t[1] + clause.right = t[3] + t[0] = clause + + # operand rules + def p_operand_0(self, t): + ''' + operand : NUMBER + ''' + clause = ConstantClause() + clause.value = t[1] + t[0] = clause + + def p_operand_1(self, t): + ''' + operand : reg_desc + | mem_desc + | gpr_desc + ''' + t[0] = t[1] + + def p_operand_2(self, t): + ''' + operand : function LPAREN clist RPAREN + ''' + clause = FunctionClause() + clause.func = t[1] + clause.args = t[3] + t[0] = clause + + + # unary rules + def p_unary_0(self, t): + ''' + unary : operand + ''' + t[0] = t[1] + + def p_unary_1(self, t): + ''' + unary : ADD expression + | INV expression + | NOT expression + | SUB expression + | ADDADD expression + | SUBSUB expression + ''' + clause = UnaryClause() + clause.op = t[1] + clause.oprnd = t[2] + t[0] = clause + + def p_unary_2(self, t): + ''' + unary : LPAREN expression RPAREN + ''' + clause = ParenClause() + clause.parexp = t[2] + t[0] = clause + + def p_unary_3(self, t): + ''' + unary : NOP + ''' + clause = FunctionClause() + clause.func = t[1] + clause.arg = None + t[0] = clause + + def p_type_name_0(self, t): + ''' + type_name : DOUBLE + | FLOAT + | INT + | UNSIGNED + | SNORM + | UNORM + ''' + t[0] = t[1] + + def p_cast_0(self, t): + ''' + cast : unary + ''' + t[0] = t[1] + + def p_cast_1(self, t): + ''' + cast : LPAREN type_name RPAREN cast + ''' + clause = CastClause() + clause.typ = t[2] + clause.var = t[4] + t[0] = clause + + def p_binary_0(self, t): + ''' + binary : cast + ''' + t[0] = t[1] + + def p_binary_1(self, t): + ''' + binary : binary MUL binary + | binary DIV binary + | binary MULMUL binary + | binary MOD binary + | binary ADD binary + | binary SUB binary + | binary LSH binary + | binary RSH binary + | binary LE binary + | binary LG binary + | binary LT binary + | binary GE binary + | binary GT binary + | binary EQ binary + | binary NE binary + | binary OR binary + | binary XOR binary + | binary AND binary + | binary ANDAND binary + | binary OROR binary + ''' + clause = BinaryClause() + clause.left = t[1] + clause.op = t[2] + clause.right = t[3] + t[0] = clause + + # conditional rules + def p_conditional_0(self, t): + ''' + conditional : binary + ''' + t[0] = t[1] + + def p_conditional_1(self, t): + ''' + conditional : binary QUESTION expression COLON conditional + ''' + clause = ConditionalClause() + clause.cond = t[1] + clause.true = t[3] + clause.false = t[5] + t[0] = clause + + # assignment rules + def p_assignment_0(self, t): + ''' + assignment : conditional + ''' + t[0] = t[1] + + def p_assignment_1(self, t): + ''' + assignment : operand EQUALS assignment + | operand ADDEQ assignment + | operand ANDEQ assignment + | operand DIVEQ assignment + | operand LSHEQ assignment + | operand MODEQ assignment + | operand MULEQ assignment + | operand OREQ assignment + | operand RSHEQ assignment + | operand SUBEQ assignment + | operand XOREQ assignment + ''' + clause = AssignmentClause() + clause.dst = t[1] + clause.op = t[2] + clause.src = t[3] + t[0] = clause + + # ifthenelse rules + def p_then_stmt_0(self, t): + ''' + then_stmt : empty + ''' + t[0] = [] + + def p_then_stmt_1(self, t): + ''' + then_stmt : statement + ''' + t[0] = t[1] + + def p_then_stmt_2(self, t): + ''' + then_stmt : THEN statement + ''' + t[0] = t[2] + + def p_then_stmt_3(self, t): + ''' + then_stmt : THEN statement terminator + ''' + t[0] = t[2] + + def p_else_stmt_0(self, t): + ''' + else_stmt : empty + ''' + t[0] = [] + + def p_else_stmt_1(self, t): + ''' + else_stmt : ELSE statement + ''' + t[0] = t[2] + + def p_else_stmt_2(self, t): + ''' + else_stmt : ELSE statement terminator + ''' + t[0] = t[2] + + # expression rules + def p_expression_0(self, t): + ''' + expression : assignment + ''' + t[0] = t[1] + + # terminator rules + def p_terminator_0(self, t): + ''' + terminator : NEWLINE + | SEMI + | DOT + | comment + ''' + if type(t[1]) is CommentClause: + clause = t[1] + else: + clause = CommentClause() + clause.content = [ t[1] ] + t[0] = clause + + # statement rules + def p_statement_0(self, t): + ''' + statement : assignment terminator + ''' + t[0] = [t[1], t[2]] + + def p_statement_2(self, t): + ''' + statement : assignment COMMA assignment + ''' + clause = ChainClause() + clause.left = t[1] + clause.right = t[3] + t[0] = [clause] + + def p_statement_3(self, t): + ''' + statement : IF LPAREN conditional RPAREN then_stmt else_stmt + ''' + clause = IfThenElseClause() + clause.cond = t[3] + clause.then_stmt = t[5] + clause.else_stmt = t[6] + t[0] = [clause] + + def p_statement_4(self, t): + ''' + statement : IF LPAREN conditional RPAREN terminator + ''' + clause = IfClause() + clause.cond = t[3] + t[0] = [clause] + + def p_statement_5(self, t): + ''' + statement : ELSE terminator + ''' + clause = ElseClause() + t[0] = [clause] + + def p_statement_6(self, t): + ''' + statement : ELSIF LPAREN conditional RPAREN terminator + ''' + clause = ElseIfClause() + clause.cond = t[3] + t[0] = [clause] + + def p_statement_7(self, t): + ''' + statement : FOR I IN expression DOTDOTDOT expression DO terminator + ''' + clause = ForClause() + clause.variable = t[2] + clause.start = t[4] + clause.end = t[6] + t[0] = [clause] + + def p_statement_8(self, t): + ''' + statement : END terminator + ''' + clause = EndClause() + t[0] = [clause] + + def p_statement_9(self, t): + ''' + statement : TAB statement terminator + ''' + clause = TabClause() + clause.stmt = t[2] + t[0] = [clause] + + def p_statement_10(self, t): + ''' + statement : NUMBER B COLON + | NUMBER B DOT + ''' + clause = SizeClause() + clause.size = t[1] + t[0] = [clause] + + def p_statement_11(self, t): + ''' + statement : terminator + ''' + t[0] = [t[1]] + + # statements rules + def p_statements_0(self, t): + ''' + statements : statement + ''' + t[0] = t[1] + + def p_statements_1(self, t): + ''' + statements : statements statement + ''' + t[1].extend(t[2]) + t[0] = t[1] + + # specification rule + def p_specification(self, t): + ''' + specification : statements + ''' + t[0] = t[1] + + + # error rule + def p_error(self, t): + if t: + # sys.exit('%d: syntax error at "%s"' % (t.lexer.lineno, t.value)) + pass + else: + # sys.exit('unknown syntax error') + pass + self.yacc.errok() + raise ParseError('p_error') + + # end rules + def cleanup_substitute(self, desc): + for sub in self.repeat_subs: + if sub[0] in desc: + desc = desc.replace(sub[0], sub[1]) + for sub in self.single_subs: + if sub[0] in desc: + return desc.replace(sub[0], sub[1]) + return desc + + def cleanup_description(self, desc_list): + edited = [] + for d in desc_list: + if d: + lines = d.split('\\n') + for l in lines: + if l: + e = self.cleanup_substitute(l + '\n') + edited.append(e) + return ''.join(edited) + + def parse_description(self, desc_list): + ''' + Parse the description strings + ''' + assert(type(desc_list) is list) + self.in_text = False + desc_string = self.cleanup_description(desc_list) + # import pdb; pdb.set_trace() + # print 'desc_string=%s' % repr(desc_string) + return self.parse_string(desc_string, '', debug=False) diff --git a/src/arch/gcn3/gpu_decoder.hh b/src/arch/amdgpu/gcn3/gpu_decoder.hh similarity index 99% rename from src/arch/gcn3/gpu_decoder.hh rename to src/arch/amdgpu/gcn3/gpu_decoder.hh index 0cd668b3a4..52bd222f74 100644 --- a/src/arch/gcn3/gpu_decoder.hh +++ b/src/arch/amdgpu/gcn3/gpu_decoder.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -37,7 +37,7 @@ #include #include -#include "arch/gcn3/gpu_types.hh" +#include "arch/amdgpu/gcn3/gpu_types.hh" class GPUStaticInst; diff --git a/src/arch/gcn3/gpu_isa.hh b/src/arch/amdgpu/gcn3/gpu_isa.hh similarity index 97% rename from src/arch/gcn3/gpu_isa.hh rename to src/arch/amdgpu/gcn3/gpu_isa.hh index 91eed99b4d..5d905569aa 100644 --- a/src/arch/gcn3/gpu_isa.hh +++ b/src/arch/amdgpu/gcn3/gpu_isa.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Advanced Micro Devices, Inc. + * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -37,7 +37,7 @@ #include #include -#include "arch/gcn3/registers.hh" +#include "arch/amdgpu/gcn3/gpu_registers.hh" #include "gpu-compute/dispatcher.hh" #include "gpu-compute/hsa_queue_entry.hh" #include "gpu-compute/misc.hh" diff --git a/src/arch/amdgpu/gcn3/gpu_isa_main.py b/src/arch/amdgpu/gcn3/gpu_isa_main.py new file mode 100644 index 0000000000..493372db7a --- /dev/null +++ b/src/arch/amdgpu/gcn3/gpu_isa_main.py @@ -0,0 +1,46 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import sys + +sys.path.append('../../../../gem5/ext/ply') +sys.path.append('../../../../gem5/src/python') + +from gpu_isa_parser import GpuIsaParser +from ast_interpreter import AstInterpreter + +# Get args from command line. +# Args are: +if __name__ == '__main__': + ast = GpuIsaParser(sys.argv[1], sys.argv[2]).parse_isa_desc() + interpreter = AstInterpreter() + interpreter.process_statements(ast) + interpreter.generate_code(sys.argv[2]) diff --git a/src/arch/amdgpu/gcn3/gpu_isa_parser.py b/src/arch/amdgpu/gcn3/gpu_isa_parser.py new file mode 100644 index 0000000000..3830c63062 --- /dev/null +++ b/src/arch/amdgpu/gcn3/gpu_isa_parser.py @@ -0,0 +1,1263 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import re +import sys + +from ast_objects import * +from m5.util.grammar import Grammar + +class GpuIsaParser(Grammar): + def __init__(self, input_file, output_dir): + super(GpuIsaParser, self).__init__() + self.input_file = input_file + self.output_dir = output_dir + + # lexer + + states = ( + ( 'qstring', 'exclusive' ), + ) + + reserved = ( + 'BITS', + 'CONST', + 'DESC', + 'DP_ONLY', + 'DST_0', + 'DST_1', + 'ENC', + 'ENCODING', + 'ENUM', + 'FIELDS', + 'FLAG', + 'FLAGS', + 'FMT', + 'GROUP', + 'IMPORT', + 'INOUT', + 'INST', + 'NAME', + 'NUM_DST', + 'NUM_SRC', + 'OP_TYPE', + 'OPERANDS', + 'PARENT_ENC', + 'PRIVATE', + 'RANGE', + 'REG', + 'SIZE', + 'SIZE_BITS', + 'SP3_DESC', + 'SP3_NAME', + 'SP3_NCOMP', + 'SP3_NUM', + 'SRC_0', + 'SRC_1', + 'SRC_2', + 'SRC_3', + 'SRC_FLAGS', + 'SUB_ENC', + 'TYPE', + 'WHEN' + ) + + # list token names + tokens = reserved + ( + 'ID', + 'STRING', + 'QSTRING', + 'NUMBER', + 'COLON', + 'SEMI', + 'DOLLAR', + 'EQUAL', + 'LBRACE', + 'RBRACE', + 'LPAREN', + 'RPAREN', + 'PLUS', + ) + + # regular expressions for simple tokens + t_COLON = r':' + t_SEMI = r';' + t_DOLLAR = r'\$' + t_EQUAL = r'=' + t_LBRACE = r'\{' + t_RBRACE = r'\}' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_PLUS = r'\+' + + reserved_map = { } + for r in reserved: + reserved_map[r.lower()] = r + + def t_QSTRING(self, t): + r'q{' + nesting = 0 + t.lexer.begin('qstring') + while True: + tok = t.lexer.token() + if not tok: + break + t.value += tok.value + if tok.type == 'LBRACE': + nesting += 1 + if tok.type == 'RBRACE': + if nesting > 0: + nesting -= 1 + else: + break + t.lexer.begin('INITIAL') + t.lexer.lineno += t.value.count('\n') + return t + + t_qstring_LBRACE = r'{' + t_qstring_RBRACE = r'}' + t_qstring_STRING = r'[^}{]+' + + t_qstring_ignore = ' \t\x0c' + + def t_qstring_error(self, t): + sys.exit('%d: illegal character "%s"' % (t.lexer.lineno, t.value[0])) + + def t_ID(self, t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + t.type = self.reserved_map.get(t.value, 'ID') + return t + + def t_STRING(self, t): + r'(?m)"([^"])+"' + t.value = t.value[1:-1] + t.lexer.lineno += t.value.count('\n') + return t + + def t_NEWLINE(self, t): + r'\n+' + t.lexer.lineno += t.value.count('\n') + + def t_BINARY(self, t): + r'0[bB][0-1]+' + t.value = int(t.value, 2) + t.type = 'NUMBER' + return t + + def t_OCTAL(self, t): + r'0[oO]?[0-7]+' + t.value = int(t.value, 8) + t.type = 'NUMBER' + return t + + def t_HEXADECIMAL(self, t): + r'0[xX][0-9a-fA-F]+' + t.value = int(t.value, 16) + t.type = 'NUMBER' + return t + + def t_DECIMAL(self, t): + r'[0-9][0-9]*' + t.value = int(t.value, 10) + t.type = 'NUMBER' + return t + + def t_COMMENT(self, t): + r'\#[^\n]*\n' + t.lexer.lineno += 1 + + t_ignore = ' \t\x0c' + + def t_error(self, t): + sys.exit('%d: illegal character "%s"' % (t.lexer.lineno, t.value[0])) + + ## parser + + start = 'specification' + + # enum_or_reg rules + + def p_enum_or_reg_0(self, t): + ''' + enum_or_reg : ENUM + ''' + t[0] = t[1] + + def p_enum_or_reg_1(self, t): + ''' + enum_or_reg : REG + ''' + t[0] = t[1] + + # statement(import_statement) rules + def p_import_statement(self, t): + ''' + statement : IMPORT enum_or_reg ID SEMI + ''' + stmnt = ImportStatement() + stmnt.what = t[2] + stmnt.name = t[3] + t[0] = stmnt + + # flags_clause rules + def p_flags_field_0(self, t): + ''' + flags_field : DESC COLON STRING + ''' + info = FlagsField() + info.tag = 'desc' + info.desc = [t[3]] + t[0] = info + + def p_flags_field_1(self, t): + ''' + flags_field : DESC PLUS COLON STRING + ''' + info = FlagsField() + info.tag = 'desc+' + info.desc = [t[4]] + t[0] = info + + def p_flags_field_2(self, t): + ''' + flags_field : PRIVATE COLON NUMBER + ''' + info = FlagsField() + info.tag = 'private' + info.private = t[3] + t[0] = info + + def p_flags_field_3(self, t): + ''' + flags_field : GROUP COLON ID + ''' + info = FlagsField() + info.tag = 'group' + info.group = t[3] + t[0] = info + + # flags_clauses rules + def p_flags_fields_0(self, t): + ''' + flags_fields : flags_field + ''' + t[0] = t[1] + + def p_flags_fields_1(self, t): + ''' + flags_fields : flags_fields flags_field + ''' + t[1].update(t[2]) + t[0] = t[1] + + # flags_clause rules + def p_flags_clause(self, t): + ''' + flags_clause : ID COLON flags_fields SEMI + ''' + field = FlagsField() + field.tag = 'name' + field.name = t[1] + t[3].update(field) + t[0] = t[3] + + # flags_clauses rules + def p_flags_clauses_0(self, t): + ''' + flags_clauses : flags_clause + ''' + t[0] = [t[1]] + + def p_flags_clauses_1(self, t): + ''' + flags_clauses : flags_clauses flags_clause + ''' + t[1].append(t[2]) + t[0] = t[1] + + # flag_clause rules + # FlagBlock: + def p_flag_clause(self, t): + ''' + flag_clause : DESC EQUAL STRING SEMI + ''' + block = FlagBlock() + block.tag = 'desc' + block.desc = [t[3]] + t[0] = block + + # flag_clauses rules + # FlagBlock: + def p_flag_clauses_0(self, t): + ''' + flag_clauses : flag_clause + ''' + t[0] = t[1] + + def p_flag_clauses_1(self, t): + ''' + flag_clauses : flag_clauses flag_clause + ''' + t[1].update(t[2]) + t[0] = t[1] + + # statement(flag_block) rules + # FlagBlock: + def p_flag_block(self, t): + ''' + statement : FLAG ID LBRACE flag_clauses RBRACE + ''' + block = FlagBlock() + block.tag = 'name' + block.name = t[2] + t[4].update(block) + t[0] = t[4] + + # statement(flags_block) rules + # FlagsBlock: + # clauses: [ FlagsFields, ... ] + def p_flags_block(self, t): + ''' + statement : FLAGS LBRACE flags_clauses RBRACE + ''' + block = FlagsBlock() + block.clauses = t[3] + t[0] = block + + # dst_X rules + def p_dst_0(self, t): + ''' + dst_X : DST_0 + ''' + t[0] = 0 + + def p_dst_1(self, t): + ''' + dst_X : DST_1 + ''' + t[0] = 1 + + # src_X rules + def p_src_0(self, t): + ''' + src_X : SRC_0 + ''' + t[0] = 0 + + def p_src_1(self, t): + ''' + src_X : SRC_1 + ''' + t[0] = 1 + + def p_src_2(self, t): + ''' + src_X : SRC_2 + ''' + t[0] = 2 + + def p_src_3(self, t): + ''' + src_X : SRC_3 + ''' + t[0] = 3 + + # operands_phrase rules + # OpInfo + def p_operands_phrase_0(self, t): + ''' + operands_phrase : dst_X EQUAL ID + ''' + phrase = OpInfo() + phrase.tag = 'dst' + phrase.opr = 'dst' + phrase.index = t[1] + phrase.iseq = t[3] + t[0] = phrase + + def p_operands_phrase_1(self, t): + ''' + operands_phrase : dst_X COLON + ''' + phrase = OpInfo() + phrase.tag = 'dst' + phrase.opr = 'dst' + phrase.index = t[1] + phrase.iseq = None + t[0] = phrase + + def p_operands_phrase_2(self, t): + ''' + operands_phrase : src_X EQUAL ID + ''' + phrase = OpInfo() + phrase.tag = 'src' + phrase.opr = 'src' + phrase.index = t[1] + phrase.iseq = t[3] + t[0] = phrase + + def p_operands_phrase_3(self, t): + ''' + operands_phrase : src_X COLON + ''' + phrase = OpInfo() + phrase.tag = 'src' + phrase.opr = 'src' + phrase.index = t[1] + phrase.iseq = None + t[0] = phrase + + def p_operands_phrase_4(self, t): + ''' + operands_phrase : NAME COLON STRING + ''' + phrase = OpInfo() + phrase.tag = 'name' + phrase.name = t[3] + t[0] = phrase + + def p_operands_phrase_5(self, t): + ''' + operands_phrase : FMT COLON ID + ''' + phrase = OpInfo() + phrase.tag = 'fmt' + phrase.fmt = t[3] + t[0] = phrase + + def p_operands_phrase_6(self, t): + ''' + operands_phrase : SIZE COLON NUMBER + ''' + phrase = OpInfo() + phrase.tag = 'size' + phrase.size = t[3] + t[0] = phrase + + def p_operands_phrase_7(self, t): + ''' + operands_phrase : INOUT COLON NUMBER + ''' + phrase = OpInfo() + phrase.tag = 'inout' + phrase.inout = t[3] + t[0] = phrase + + # operands_phrases rules + # OpInfo + def p_operands_phrases_0(self, t): + ''' + operands_phrases : operands_phrase + ''' + t[0] = t[1] + + def p_operands_phrases_1(self, t): + ''' + operands_phrases : operands_phrases operands_phrase + ''' + t[1].update(t[2]) + t[0] = t[1] + + # operands_clause rules + # Operand: + # dst: [ OpInfo, ... ] + # src: [ OpInfo, ... ] + # when: [ WhenBlock, ... ] + # operands: [ Operand, ... ] + def p_operands_clause_0(self, t): + ''' + operands_clause : NUM_DST EQUAL NUMBER SEMI + ''' + clause = Operand() + clause.tag = 'num_dst' + clause.num_dst = t[3] + t[0] = clause + + def p_operands_clause_1(self, t): + ''' + operands_clause : NUM_SRC EQUAL NUMBER SEMI + ''' + clause = Operand() + clause.tag = 'num_src' + clause.num_src = t[3] + t[0] = clause + + def p_operands_clause_2(self, t): + ''' + operands_clause : PARENT_ENC EQUAL ID SEMI + ''' + clause = Operand() + clause.tag = 'parent_enc' + clause.parent_enc = t[3] + t[0] = clause + + def p_operands_clause_3(self, t): + ''' + operands_clause : SUB_ENC EQUAL ID SEMI + ''' + clause = Operand() + clause.tag = 'sub_enc' + clause.sub_enc = t[3] + t[0] = clause + + def p_operands_clause_4(self, t): + ''' + operands_clause : FLAGS EQUAL ID SEMI + ''' + clause = Operand() + clause.tag = 'flags' + clause.flags = [t[3]] + t[0] = clause + + def p_operands_clause_5(self, t): + ''' + operands_clause : FLAGS EQUAL STRING SEMI + ''' + clause = Operand() + clause.tag = 'flags' + # handle flags = 'flag1 flag2' + regexp = re.compile('\w+') + clause.flags = regexp.findall(t[3]) + t[0] = clause + + def p_operands_clause_6(self, t): + ''' + operands_clause : operands_phrases SEMI + ''' + clause = Operand() + clause.tag = t[1].opr + if clause.tag == 'dst': + clause.dst = [t[1]] + elif clause.tag == 'src': + clause.src = [t[1]] + else: + assert (False), 'p_operands_clause_6: unexpected tag ' + clause.tag + t[0] = clause + + def p_operands_clause_7(self, t): + ''' + operands_clause : when_block + ''' + clause = Operand() + clause.tag = 'when' + clause.when = [t[1]] + t[0] = clause + + def p_operands_clause_8(self, t): + ''' + operands_clause : operands_block + ''' + clause = Operand() + clause.tag = 'operands' + clause.operands = [t[1]] + t[0] = clause + + # operands_clauses rules + # Operand + def p_operands_clauses_0(self, t): + ''' + operands_clauses : operands_clause + ''' + t[0] = t[1] + + def p_operands_clauses_1(self, t): + ''' + operands_clauses : operands_clauses operands_clause + ''' + t[1].update(t[2]) + t[0] = t[1] + + # when_block rules + # WhenBlock: + # operands: [ Operand, ... ] + def p_when_block_0(self, t): + ''' + when_block : WHEN FLAGS EQUAL ID LBRACE operands_clauses RBRACE + ''' + block = WhenBlock() + block.left = 'flags' + block.right = [t[4]] + block.operand = t[6] + t[0] = block + + def p_when_block_1(self, t): + ''' + when_block : WHEN FLAGS EQUAL STRING LBRACE operands_clauses RBRACE + ''' + block = WhenBlock() + block.left = 'flags' + # handle when flags = 'flag1 flag2' + regexp = re.compile('\w+') + block.right = regexp.findall(t[4]) + block.operand = t[6] + t[0] = block + + def p_when_block_2(self, t): + ''' + when_block : WHEN SUB_ENC EQUAL STRING LBRACE operands_clauses RBRACE + ''' + block = WhenBlock() + block.left = 'sub_enc' + block.right = [t[4]] + block.operand = t[6] + t[0] = block + + # operands_block rules + # Operand + def p_operands_block(self, t): + ''' + operands_block : OPERANDS LBRACE operands_clauses RBRACE + ''' + t[0] = t[3] + + # encoding_clause rules + # EncodingBlock: + # operands: [ Operand, ... ] + def p_encoding_clause_0(self, t): + ''' + encoding_clause : BITS EQUAL STRING SEMI + ''' + clause = EncodingBlock() + clause.tag = 'bits' + clause.bits = t[3] + t[0] = clause + + def p_encoding_clause_1(self, t): + ''' + encoding_clause : SIZE EQUAL NUMBER SEMI + ''' + clause = EncodingBlock() + clause.tag = 'size' + clause.size = t[3] + t[0] = clause + + def p_encoding_clause_2(self, t): + ''' + encoding_clause : DESC EQUAL STRING SEMI + ''' + clause = EncodingBlock() + clause.tag = 'desc' + clause.desc = [t[3]] + t[0] = clause + + def p_encoding_clause_3(self, t): + ''' + encoding_clause : DESC PLUS EQUAL STRING SEMI + ''' + clause = EncodingBlock() + clause.tag = 'desc+' + clause.desc = [t[4]] + t[0] = clause + + def p_encoding_clause_4(self, t): + ''' + encoding_clause : operands_block + ''' + clause = EncodingBlock() + clause.tag = 'operands' + clause.operands = [t[1]] + t[0] = clause + + # encoding clauses rules + # EncodingBlock: + # operands: [ Operand, ... ] + def p_encoding_clauses_0(self, t): + ''' + encoding_clauses : encoding_clause + ''' + t[0] = t[1] + + def p_encoding_clauses_1(self, t): + ''' + encoding_clauses : encoding_clauses encoding_clause + ''' + t[1].update(t[2]) + t[0] = t[1] + + # encoding_block rules + # EncodingBlock: + # operands: [ Operand, ... ] + + def p_encoding_block(self, t): + ''' + statement : ENCODING ID LBRACE encoding_clauses RBRACE + ''' + clause = EncodingBlock() + clause.tag = 'name' + clause.name = t[2] + t[4].update(clause) + t[0] = t[4] + + # const_clauses rules + def p_const_clause(self, t): + ''' + const_clause : ID EQUAL NUMBER SEMI + ''' + clause = ConstClause() + clause.name = t[1] + clause.value = t[3] + t[0] = clause + + # const_clauses rules + def p_const_clauses_0(self, t): + ''' + const_clauses : const_clause + ''' + t[0] = [t[1]] + + def p_const_clauses_1(self, t): + ''' + const_clauses : const_clauses const_clause + ''' + t[1].append(t[2]) + t[0] = t[1] + + # statement(const_block) rules + def p_const_block(self, t): + ''' + statement : CONST LBRACE const_clauses RBRACE + ''' + stmnt = ConstBlock() + stmnt.clauses = t[3] + t[0] = stmnt + + # type_phrase rules + def p_type_phrase_0(self, t): + ''' + type_phrase : ID EQUAL NUMBER COLON NUMBER + ''' + field = TypeClause() + field.tag = 'id_range' + field.name = t[1] + field.value = t[3] + field.v_max = t[5] + t[0] = field + + def p_type_phrase_1(self, t): + ''' + type_phrase : ID EQUAL NUMBER + ''' + field = TypeClause() + field.tag = 'id_number' + field.name = t[1] + field.value = t[3] + t[0] = field + + def p_type_phrase_2(self, t): + ''' + type_phrase : ID DOLLAR LBRACE ID RBRACE ID EQUAL NUMBER + ''' + field = TypeClause() + field.tag = 'id_var_number' + field.name = t[1] + '${' + t[4] + '}' + t[6] + field.value = t[8] + field.var = True + t[0] = field + + def p_type_phrase_3(self, t): + ''' + type_phrase : ID DOLLAR LBRACE ID RBRACE EQUAL NUMBER + ''' + field = TypeClause() + field.tag = 'id_var_number' + field.name = t[1] + '${' + t[4] + '}' + field.value = t[7] + field.var = True + t[0] = field + + def p_type_phrase_4(self, t): + ''' + type_phrase : DESC COLON STRING + ''' + field = TypeClause() + field.tag = 'desc' + field.desc = [t[3]] + t[0] = field + + def p_type_phrase_5(self, t): + ''' + type_phrase : DESC COLON ID LPAREN ID RPAREN + ''' + field = TypeClause() + field.tag = 'desc' + field.desc = [ t[3] + '(' + t[5] + ')' ] + t[0] = field + + def p_type_phrase_6(self, t): + ''' + type_phrase : DESC PLUS COLON ID LPAREN ID RPAREN + ''' + field = TypeClause() + field.tag = 'desc+' + field.desc = [ t[4] + '(' + t[6] + ')' ] + t[0] = field + + def p_type_phrase_7(self, t): + ''' + type_phrase : DESC PLUS COLON STRING + ''' + field = TypeClause() + field.tag = 'desc+' + field.desc = [t[4]] + t[0] = field + + def p_type_phrase_8a(self, t): + ''' + type_phrase : DESC COLON QSTRING + ''' + field = TypeClause() + field.tag = 'desc' + field.desc = t[3][2:-1].strip().split('\n') + t[0] = field + + def p_type_phrase_8b(self, t): + ''' + type_phrase : DESC PLUS COLON QSTRING + ''' + field = TypeClause() + field.tag = 'desc+' + field.desc = t[4][2:-1].strip().split('\n') + t[0] = field + + def p_type_phrase_9(self, t): + ''' + type_phrase : FLAGS COLON ID + ''' + field = TypeClause() + field.tag = 'flags' + field.flags = [t[3]] + t[0] = field + + def p_type_phrase_10(self, t): + ''' + type_phrase : FLAGS COLON STRING + ''' + field = TypeClause() + field.tag = 'flags' + # handle flags: 'flag1 flag2' + regexp = re.compile('\w+') + field.flags = regexp.findall(t[3]) + t[0] = field + + def p_type_phrase_11(self, t): + ''' + type_phrase : FLAGS PLUS COLON ID + ''' + field = TypeClause() + field.tag = 'flags+' + field.flags = [t[4]] + t[0] = field + + def p_type_phrase_12(self, t): + ''' + type_phrase : FLAGS PLUS COLON STRING + ''' + field = TypeClause() + field.tag = 'flags+' + # handle flags+: 'flag1 flag2' + regexp = re.compile('\w+') + field.flags = regexp.findall(t[4]) + t[0] = field + + def p_type_phrase_13(self, t): + ''' + type_phrase : SRC_FLAGS COLON ID + ''' + field = TypeClause() + field.tag = 'src_flags' + field.src_flags = t[3] + t[0] = field + + def p_type_phrase_14(self, t): + ''' + type_phrase : SRC_FLAGS COLON STRING + ''' + field = TypeClause() + field.tag = 'src_flags' + field.src_flags = t[3] + t[0] = field + + def p_type_phrase_15(self, t): + ''' + type_phrase : SP3_DESC COLON STRING + ''' + field = TypeClause() + field.tag = 'sp3_desc' + field.sp3_desc = [t[3]] + t[0] = field + + def p_type_phrase_16(self, t): + ''' + type_phrase : SP3_DESC PLUS COLON STRING + ''' + field = TypeClause() + field.tag = 'sp3_desc+' + field.sp3_desc = [t[4]] + t[0] = field + + def p_type_phrase_17(self, t): + ''' + type_phrase : SP3_NAME COLON STRING + ''' + field = TypeClause() + field.tag = 'sp3_name' + field.sp3_name = t[3] + t[0] = field + + def p_type_phrase_18(self, t): + ''' + type_phrase : SP3_NCOMP COLON NUMBER + ''' + field = TypeClause() + field.tag = 'sp3_ncomp' + field.sp3_ncomp = t[3] + t[0] = field + + def p_type_phrase_19(self, t): + ''' + type_phrase : SP3_NUM COLON STRING + ''' + field = TypeClause() + field.tag = 'sp3_num' + field.sp3_num = t[3] + t[0] = field + + def p_type_phrase_20(self, t): + ''' + type_phrase : SUB_ENC COLON ID + ''' + field = TypeClause() + field.tag = 'sub_enc' + field.sub_enc = t[3] + t[0] = field + + def p_type_phrase_21(self, t): + ''' + type_phrase : OP_TYPE COLON ID + ''' + field = TypeClause() + field.tag = 'op_type' + field.op_type = t[3] + t[0] = field + + def p_type_phrase_22(self, t): + ''' + type_phrase : DP_ONLY COLON NUMBER + ''' + field = TypeClause() + field.tag = 'dp_only' + field.sub_enc = t[3] + t[0] = field + + def p_type_phrase_23(self, t): + ''' + type_phrase : SIZE COLON NUMBER + ''' + field = TypeClause() + field.tag = 'size' + field.size = t[3] + t[0] = field + + def p_type_phrase_24(self, t): + ''' + type_phrase : FMT COLON ID + ''' + field = TypeClause() + field.tag = 'fmt' + field.fmt = t[3] + t[0] = field + + def p_type_phrase_25(self, t): + ''' + type_phrase : TYPE ID + ''' + field = TypeClause() + field.tag = 'type' + field.type = t[2] + t[0] = field + + def p_type_phrase_26(self, t): + ''' + type_phrase : RANGE NUMBER COLON NUMBER + ''' + field = TypeClause() + field.tag = 'range' + field.range = [ t[2], t[4] ] + t[0] = field + + def p_type_phrase_27(self, t): + ''' + type_phrase : SIZE_BITS NUMBER + ''' + field = TypeClause() + field.tag = 'size_bits' + field.size_bits = t[2] + t[0] = field + + # type_phrases rules + # [ TypeClause, ... ] + def p_type_phrases_0(self, t): + ''' + type_phrases : type_phrase + ''' + t[0] = t[1] + + def p_type_phrases_1(self, t): + ''' + type_phrases : type_phrases type_phrase + ''' + t[1].update(t[2]) + t[0] = t[1] + + # type_clause rules + # [ TypeClause, ... ] + def p_type_clause(self, t): + ''' + type_clause : type_phrases SEMI + ''' + t[0] = t[1] + + # type_clauses rules + def p_type_clauses_0(self, t): + ''' + type_clauses : type_clause + ''' + t[0] = [t[1]] + + def p_type_clauses_1(self, t): + ''' + type_clauses : type_clauses type_clause + ''' + t[1].append(t[2]) + t[0] = t[1] + + # statement(type_block) rules + # TypeBlock: + # clauses: [ TypeClause, ... ] + def p_type_block(self, t): + ''' + statement : TYPE ID LBRACE type_clauses RBRACE + ''' + stmnt = TypeBlock() + stmnt.name = t[2] + stmnt.clauses = t[4] + t[0] = stmnt + + # inst_field rules + def p_inst_field_0(self, t): + ''' + inst_field : ID EQUAL NUMBER COLON NUMBER + ''' + field = InstField() + field.tag = 'id_range' + field.name = t[1] + field.v_max = t[3] + field.value = t[5] + t[0] = field + + def p_inst_field_1(self, t): + ''' + inst_field : ID EQUAL NUMBER + ''' + field = InstField() + field.tag = 'id_number' + field.name = t[1] + field.v_max = t[3] + field.value = t[3] + t[0] = field + + def p_inst_field_2(self, t): + ''' + inst_field : DESC COLON STRING + ''' + field = InstField() + field.tag = 'desc' + field.desc = t[3] + t[0] = field + + def p_inst_field_3(self, t): + ''' + inst_field : TYPE COLON ID + ''' + field = InstField() + field.tag = 'type' + field.type = t[3] + t[0] = field + + def p_inst_field_4(self, t): + ''' + inst_field : ENC COLON ID + ''' + field = InstField() + field.tag = 'enc' + field.enc = t[3] + t[0] = field + + # fields_phrases rules + # [ InstFields, ... ] + def p_fields_phrases_0(self, t): + ''' + fields_phrases : inst_field + ''' + t[0] = t[1] + + def p_fields_phrases_1(self, t): + ''' + fields_phrases : fields_phrases inst_field + ''' + t[1].update(t[2]) + t[0] = t[1] + + # inst_fields rules + # [ InstFields, ... ] + def p_fields_clause(self, t): + ''' + fields_clause : fields_phrases SEMI + ''' + t[0] = t[1] + + # fields_clauses rules + def p_fields_clauses_0(self, t): + ''' + fields_clauses : fields_clause + ''' + t[0] = [t[1]] + + def p_fields_clauses_1(self, t): + ''' + fields_clauses : fields_clauses fields_clause + ''' + t[1].append(t[2]) + t[0] = t[1] + + # fields_block rule + # [ InstFields, ... ] + def p_fields_block(self, t): + ''' + fields_block : FIELDS LBRACE fields_clauses RBRACE + ''' + t[0] = t[3] + + # inst_clause rules + # InstBlock: + # fields: [ InstFields, ... ] + def p_inst_clause_0(self, t): + ''' + inst_clause : DESC EQUAL STRING SEMI + ''' + clause = InstBlock() + clause.tag = 'desc' + clause.desc = t[3] + t[0] = clause + + def p_inst_clause_1(self, t): + ''' + inst_clause : fields_block + ''' + clause = InstBlock() + clause.tag = 'fields' + clause.fields = t[1] + t[0] = clause + + # inst_clauses rules + # InstBlock: + # fields: [ InstField, ... ] + def p_inst_clauses_0(self, t): + ''' + inst_clauses : inst_clause + ''' + t[0] = t[1] + + def p_inst_clauses_1(self, t): + ''' + inst_clauses : inst_clauses inst_clause + ''' + t[1].update(t[2]) + t[0] = t[1] + + # statement(inst_block) rules + # InstBlock: + # fields: [ InstFields, ... ] + def p_inst_block(self, t): + ''' + statement : INST ID LBRACE inst_clauses RBRACE + ''' + block = InstBlock() + block.tag = 'name' + # drop '_0' to simplify matches with encodings + n = t[2] + if '_0' in n: + n = n.replace('_0', '') + block.name = n + t[4].update(block) + t[0] = t[4] + + # statements rules + def p_statements_0(self, t): + ''' + statements : statement + ''' + t[0] = [t[1]] + + def p_statements_1(self, t): + ''' + statements : statements statement + ''' + t[1].append(t[2]) + t[0] = t[1] + + # specification rule + def p_specification(self, t): + ''' + specification : statements + ''' + t[0] = t[1] + + # error rule + def p_error(self, t): + if t: + print '%d: syntax error at "%s"' % (t.lexer.lineno, t.value) + else: + print 'unknown syntax error' + import pdb; pdb.set_trace() + + # end rules + def parse_isa_desc(self): + ''' + Read in and parse the ISA description + ''' + try: + contents = open(self.input_file).read() + except IOError: + error('Error with file "%s"' % self.input_file) + + return self.parse_string(contents, '', debug=False) diff --git a/src/arch/gcn3/gpu_mem_helpers.hh b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh similarity index 98% rename from src/arch/gcn3/gpu_mem_helpers.hh rename to src/arch/amdgpu/gcn3/gpu_mem_helpers.hh index 035cbc7423..0562622cae 100644 --- a/src/arch/gcn3/gpu_mem_helpers.hh +++ b/src/arch/amdgpu/gcn3/gpu_mem_helpers.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Advanced Micro Devices, Inc. + * Copyright (c) 2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -34,8 +34,8 @@ #ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__ #define __ARCH_GCN3_GPU_MEM_HELPERS_HH__ -#include "arch/gcn3/insts/gpu_static_inst.hh" -#include "arch/gcn3/insts/op_encodings.hh" +#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" +#include "arch/amdgpu/gcn3/insts/op_encodings.hh" #include "debug/GPUMem.hh" #include "gpu-compute/gpu_dyn_inst.hh" diff --git a/src/arch/amdgpu/gcn3/gpu_registers.hh b/src/arch/amdgpu/gcn3/gpu_registers.hh new file mode 100644 index 0000000000..c0afdd9dff --- /dev/null +++ b/src/arch/amdgpu/gcn3/gpu_registers.hh @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. + * All rights reserved. + * + * For use for simulation and test purposes only + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_GCN3_REGISTERS_HH__ +#define __ARCH_GCN3_REGISTERS_HH__ + +#include +#include +#include + +#include "arch/generic/vec_reg.hh" +#include "base/intmath.hh" +#include "base/logging.hh" + +namespace Gcn3ISA +{ + enum OpSelector : int + { + REG_SGPR_MIN = 0, + REG_SGPR_MAX = 101, + REG_FLAT_SCRATCH_LO = 102, + REG_FLAT_SCRATCH_HI = 103, + REG_XNACK_MASK_LO = 104, + REG_XNACK_MASK_HI = 105, + REG_VCC_LO = 106, + REG_VCC_HI = 107, + REG_TBA_LO = 108, + REG_TBA_HI = 109, + REG_TMA_LO = 110, + REG_TMA_HI = 111, + REG_TTMP_0 = 112, + REG_TTMP_1 = 113, + REG_TTMP_2 = 114, + REG_TTMP_3 = 115, + REG_TTMP_4 = 116, + REG_TTMP_5 = 117, + REG_TTMP_6 = 118, + REG_TTMP_7 = 119, + REG_TTMP_8 = 120, + REG_TTMP_9 = 121, + REG_TTMP_10 = 122, + REG_TTMP_11 = 123, + REG_M0 = 124, + REG_RESERVED_1 = 125, + REG_EXEC_LO = 126, + REG_EXEC_HI = 127, + REG_ZERO = 128, + REG_INT_CONST_POS_MIN = 129, + REG_INT_CONST_POS_MAX = 192, + REG_INT_CONST_NEG_MIN = 193, + REG_INT_CONST_NEG_MAX = 208, + REG_RESERVED_2 = 209, + REG_RESERVED_3 = 210, + REG_RESERVED_4 = 211, + REG_RESERVED_5 = 212, + REG_RESERVED_6 = 213, + REG_RESERVED_7 = 214, + REG_RESERVED_8 = 215, + REG_RESERVED_9 = 216, + REG_RESERVED_10 = 217, + REG_RESERVED_11 = 218, + REG_RESERVED_12 = 219, + REG_RESERVED_13 = 220, + REG_RESERVED_14 = 221, + REG_RESERVED_15 = 222, + REG_RESERVED_16 = 223, + REG_RESERVED_17 = 224, + REG_RESERVED_18 = 225, + REG_RESERVED_19 = 226, + REG_RESERVED_20 = 227, + REG_RESERVED_21 = 228, + REG_RESERVED_22 = 229, + REG_RESERVED_23 = 230, + REG_RESERVED_24 = 231, + REG_RESERVED_25 = 232, + REG_RESERVED_26 = 233, + REG_RESERVED_27 = 234, + REG_RESERVED_28 = 235, + REG_RESERVED_29 = 236, + REG_RESERVED_30 = 237, + REG_RESERVED_31 = 238, + REG_RESERVED_32 = 239, + REG_POS_HALF = 240, + REG_NEG_HALF = 241, + REG_POS_ONE = 242, + REG_NEG_ONE = 243, + REG_POS_TWO = 244, + REG_NEG_TWO = 245, + REG_POS_FOUR = 246, + REG_NEG_FOUR = 247, + REG_PI = 248, + /* NOTE: SDWA and SWDA both refer to sub d-word addressing */ + REG_SRC_SWDA = 249, + REG_SRC_DPP = 250, + REG_VCCZ = 251, + REG_EXECZ = 252, + REG_SCC = 253, + REG_LDS_DIRECT = 254, + REG_SRC_LITERAL = 255, + REG_VGPR_MIN = 256, + REG_VGPR_MAX = 511 + }; + + constexpr size_t MaxOperandDwords(16); + const int NumVecElemPerVecReg(64); + // op selector values 129 - 192 correspond to const values 1 - 64 + const int NumPosConstRegs = REG_INT_CONST_POS_MAX + - REG_INT_CONST_POS_MIN + 1; + // op selector values 193 - 208 correspond to const values -1 - 16 + const int NumNegConstRegs = REG_INT_CONST_NEG_MAX + - REG_INT_CONST_NEG_MIN + 1; + const int BITS_PER_BYTE = 8; + const int BITS_PER_WORD = 16; + const int MSB_PER_BYTE = (BITS_PER_BYTE - 1); + const int MSB_PER_WORD = (BITS_PER_WORD - 1); + + // typedefs for the various sizes/types of scalar regs + typedef uint8_t ScalarRegU8; + typedef int8_t ScalarRegI8; + typedef uint16_t ScalarRegU16; + typedef int16_t ScalarRegI16; + typedef uint32_t ScalarRegU32; + typedef int32_t ScalarRegI32; + typedef float ScalarRegF32; + typedef uint64_t ScalarRegU64; + typedef int64_t ScalarRegI64; + typedef double ScalarRegF64; + + // typedefs for the various sizes/types of vector reg elements + typedef uint8_t VecElemU8; + typedef int8_t VecElemI8; + typedef uint16_t VecElemU16; + typedef int16_t VecElemI16; + typedef uint32_t VecElemU32; + typedef int32_t VecElemI32; + typedef float VecElemF32; + typedef uint64_t VecElemU64; + typedef int64_t VecElemI64; + typedef double VecElemF64; + + const int DWORDSize = sizeof(VecElemU32); + /** + * Size of a single-precision register in DWORDs. + */ + const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize; + + // typedefs for the various sizes/types of vector regs + using VecRegU8 = ::VecRegT; + using VecRegI8 = ::VecRegT; + using VecRegU16 = ::VecRegT; + using VecRegI16 = ::VecRegT; + using VecRegU32 = ::VecRegT; + using VecRegI32 = ::VecRegT; + using VecRegF32 = ::VecRegT; + using VecRegU64 = ::VecRegT; + using VecRegI64 = ::VecRegT; + using VecRegF64 = ::VecRegT; + // non-writeable versions of vector regs + using ConstVecRegU8 = ::VecRegT; + using ConstVecRegI8 = ::VecRegT; + using ConstVecRegU16 = ::VecRegT; + using ConstVecRegI16 = ::VecRegT; + using ConstVecRegU32 = ::VecRegT; + using ConstVecRegI32 = ::VecRegT; + using ConstVecRegF32 = ::VecRegT; + using ConstVecRegU64 = ::VecRegT; + using ConstVecRegI64 = ::VecRegT; + using ConstVecRegF64 = ::VecRegT; + + using VecRegContainerU8 = VecRegU8::Container; + using VecRegContainerU16 = VecRegU16::Container; + using VecRegContainerU32 = VecRegU32::Container; + using VecRegContainerU64 = VecRegU64::Container; + + struct StatusReg + { + StatusReg() : SCC(0), SPI_PRIO(0), USER_PRIO(0), PRIV(0), TRAP_EN(0), + TTRACE_EN(0), EXPORT_RDY(0), EXECZ(0), VCCZ(0), IN_TG(0), + IN_BARRIER(0), HALT(0), TRAP(0), TTRACE_CU_EN(0), VALID(0), + ECC_ERR(0), SKIP_EXPORT(0), PERF_EN(0), COND_DBG_USER(0), + COND_DBG_SYS(0), ALLOW_REPLAY(0), INSTRUCTION_ATC(0), RESERVED(0), + MUST_EXPORT(0), RESERVED_1(0) + { + } + + uint32_t SCC : 1; + uint32_t SPI_PRIO : 2; + uint32_t USER_PRIO : 2; + uint32_t PRIV : 1; + uint32_t TRAP_EN : 1; + uint32_t TTRACE_EN : 1; + uint32_t EXPORT_RDY : 1; + uint32_t EXECZ : 1; + uint32_t VCCZ : 1; + uint32_t IN_TG : 1; + uint32_t IN_BARRIER : 1; + uint32_t HALT : 1; + uint32_t TRAP : 1; + uint32_t TTRACE_CU_EN : 1; + uint32_t VALID : 1; + uint32_t ECC_ERR : 1; + uint32_t SKIP_EXPORT : 1; + uint32_t PERF_EN : 1; + uint32_t COND_DBG_USER : 1; + uint32_t COND_DBG_SYS : 1; + uint32_t ALLOW_REPLAY : 1; + uint32_t INSTRUCTION_ATC : 1; + uint32_t RESERVED : 3; + uint32_t MUST_EXPORT : 1; + uint32_t RESERVED_1 : 4; + }; + + std::string opSelectorToRegSym(int opIdx, int numRegs=0); + int opSelectorToRegIdx(int opIdx, int numScalarRegs); + bool isPosConstVal(int opIdx); + bool isNegConstVal(int opIdx); + bool isConstVal(int opIdx); + bool isLiteral(int opIdx); + bool isScalarReg(int opIdx); + bool isVectorReg(int opIdx); + bool isFlatScratchReg(int opIdx); + bool isExecMask(int opIdx); + bool isVccReg(int opIdx); +} // namespace Gcn3ISA + +#endif // __ARCH_GCN3_REGISTERS_HH__ diff --git a/src/arch/gcn3/gpu_types.hh b/src/arch/amdgpu/gcn3/gpu_types.hh similarity index 97% rename from src/arch/gcn3/gpu_types.hh rename to src/arch/amdgpu/gcn3/gpu_types.hh index 2224ce1737..79839c8668 100644 --- a/src/arch/gcn3/gpu_types.hh +++ b/src/arch/amdgpu/gcn3/gpu_types.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only diff --git a/src/arch/amdgpu/gcn3/hand_coded.py b/src/arch/amdgpu/gcn3/hand_coded.py new file mode 100644 index 0000000000..6b6680ea39 --- /dev/null +++ b/src/arch/amdgpu/gcn3/hand_coded.py @@ -0,0 +1,414 @@ +# Copyright (c) 2015-2021 Advanced Micro Devices, Inc. +# All rights reserved. +# +# For use for simulation and test purposes only +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +HandCodedExecMethods = { + 'Inst_SOPP__S_NOP' : [ + 'ExecNOP(gpuDynInst, instData.SIMM16);' + ], + 'Inst_VOP1__V_NOP' : [ + 'ExecNOP(gpuDynInst, 1);' + ], + 'Inst_VOP3__V_NOP' : [ + 'ExecNOP(gpuDynInst, 1);' + ], + 'Inst_DS__DS_NOP' : [ + 'ExecNOP(gpuDynInst, 1);' + ], + 'Inst_SOPP__S_ENDPGM' : [ + 'ExecEndPgm(gpuDynInst);' + ], + 'Inst_SOPP__S_ENDPGM_SAVED' : [ + 'ExecEndPgmSaved(gpuDynInst);' + ], + 'Inst_VOP2__V_MUL_HI_I32_I24' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, instData.SRC0);', + 'src_1 = readVectorReg(gpuDynInst, instData.VSRC1);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' int64_t s0 = (int64_t)src_0[t](23, 0);', + ' int64_t s1 = (int64_t)src_1[t](23, 0);', + ' vdst[t] = (int32_t)((s0 * s1) >> 32);', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP2__V_MUL_HI_U32_U24' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, instData.SRC0);', + 'src_1 = readVectorReg(gpuDynInst, instData.VSRC1);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' uint64_t s0 = (uint64_t)src_0[t](23, 0);', + ' uint64_t s1 = (uint64_t)src_1[t](23, 0);', + ' vdst[t] = (uint32_t)((s0 * s1) >> 32);', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + + # stuff below here should eventually get fixed in the parser + 'Inst_SOP1__S_MOVRELS_B64' : [ + 'm0 = readSpecialReg(gpuDynInst, REG_M0);', + 'ssrc = readScalarReg(gpuDynInst, instData.SSRC0 + m0);', + 'sdst = ssrc;', + 'writeScalarReg(gpuDynInst, instData.SDST, sdst);' + ], + 'Inst_SOP1__S_MOVRELD_B64' : [ + 'm0 = readSpecialReg(gpuDynInst, REG_M0);', + 'ssrc = readScalarReg(gpuDynInst, instData.SSRC0);', + 'sdst = ssrc;', + 'writeScalarReg(gpuDynInst, instData.SDST + m0, sdst);' + ], + 'Inst_SOPC__S_SET_GPR_IDX_ON' : [ + 'ssrc_0 = readScalarReg(gpuDynInst, instData.SSRC0);', + 'simm4 = instData.SSRC1;', + 'm0(7, 0) = ssrc_0(7, 0);', + 'm0(15, 12) = (uint32_t)simm4;', + 'writeSpecialReg(gpuDynInst, REG_M0, m0);' + ], + 'Inst_VOP2__V_MADMK_F32' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, instData.SRC0);', + 'k = extData.imm_f32;', + 'src_2 = readVectorReg(gpuDynInst, instData.VSRC1);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vdst[t] = src_0[t] * k + src_2[t];', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP2__V_MADAK_F32' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, instData.SRC0);', + 'src_1 = readVectorReg(gpuDynInst, instData.VSRC1);', + 'k = extData.imm_f32;', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vdst[t] = src_0[t] * src_1[t] + k;', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP2__V_MADMK_F16' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, instData.SRC0);', + 'k = extData.imm_f32;', + 'src_2 = readVectorReg(gpuDynInst, instData.VSRC1);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vdst[t] = src_0[t] * k + src_2[t];', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP2__V_MADAK_F16' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, instData.SRC0);', + 'src_1 = readVectorReg(gpuDynInst, instData.VSRC1);', + 'k = extData.imm_f32;', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vdst[t] = src_0[t] * src_1[t] + k;', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP3__V_MUL_HI_I32_I24' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, extData.SRC0);', + 'src_1 = readSrcReg(gpuDynInst, extData.SRC1);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' int64_t s0 = (int64_t)(int32_t)src_0[t](23, 0);', + ' int64_t s1 = (int64_t)(int32_t)src_1[t](23, 0);', + ' vdst[t] = (int32_t)((s0 * s1) >> 32);', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP3__V_MUL_HI_U32_U24' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, extData.SRC0);', + 'src_1 = readSrcReg(gpuDynInst, extData.SRC1);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' uint64_t s0 = (uint64_t)(uint32_t)src_0[t](23, 0);', + ' uint64_t s1 = (uint64_t)(uint32_t)src_1[t](23, 0);', + ' vdst[t] = (int32_t)((s0 * s1) >> 32);', + ' }', + '}', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP3__V_MAD_U64_U32' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, extData.SRC0);', + 'src_1 = readSrcReg(gpuDynInst, extData.SRC1);', + 'src_2 = readSrcReg(gpuDynInst, extData.SRC2);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vcc(t) = muladd(vdst[t], src_0[t], src_1[t], src_2[t]);', + ' }', + '}', + 'writeSpecialReg(gpuDynInst, REG_VCC, vcc);', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_VOP3__V_MAD_I64_I32' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vdst = readVectorReg(gpuDynInst, instData.VDST);', + 'src_0 = readSrcReg(gpuDynInst, extData.SRC0);', + 'src_1 = readSrcReg(gpuDynInst, extData.SRC1);', + 'src_2 = readSrcReg(gpuDynInst, extData.SRC2);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vcc(t) = muladd(vdst[t], src_0[t], src_1[t], src_2[t]);', + ' }', + '}', + 'writeSpecialReg(gpuDynInst, REG_VCC, vcc);', + 'writeVectorReg(gpuDynInst, instData.VDST, vdst);' + ], + 'Inst_DS__DS_WRITE_B96' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vgpr_a = readVectorReg(gpuDynInst, extData.ADDR);', + 'calculateAddr(gpuDynInst, vgpr_a, 8, 0);', + 'calculateAddr(gpuDynInst, vgpr_a, 4, 0);', + 'calculateAddr(gpuDynInst, vgpr_a, 0, 0);', + 'vgpr_d0 = readVectorReg(gpuDynInst, extData.DATA0);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vmem_0[t] = vgpr_d0[t].getDword(2);', + ' vmem_1[t] = vgpr_d0[t].getDword(1);', + ' vmem_2[t] = vgpr_d0[t].getDword(0);', + ' }', + '}', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_0);', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_1);', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_2);' + ], + 'Inst_DS__DS_WRITE_B128' : [ + 'exec = readSpecialReg(gpuDynInst, REG_EXEC);', + 'vgpr_a = readVectorReg(gpuDynInst, extData.ADDR);', + 'calculateAddr(gpuDynInst, vgpr_a, 12, 0);', + 'calculateAddr(gpuDynInst, vgpr_a, 8, 0);', + 'calculateAddr(gpuDynInst, vgpr_a, 4, 0);', + 'calculateAddr(gpuDynInst, vgpr_a, 0, 0);', + 'vgpr_d0 = readVectorReg(gpuDynInst, extData.DATA0);', + 'for (unsigned t = 0; exec != 0; t++, exec >>= 1) {', + ' if ((exec & 1) != 0) {', + ' vmem_0[t] = vgpr_d0[t].getDword(3);', + ' vmem_1[t] = vgpr_d0[t].getDword(2);', + ' vmem_2[t] = vgpr_d0[t].getDword(1);', + ' vmem_3[t] = vgpr_d0[t].getDword(0);', + ' }', + '}', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_0);', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_1);', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_2);', + 'writeMem(gpuDynInst, vgpr_a, 0, vmem_3);' + ], + 'Inst_SOPP__S_WAITCNT' : [ + 'int vm_cnt = 0;', + 'int lgkm_cnt = 0;', + 'vm_cnt = bits(instData.SIMM16, 3, 0);', + 'lgkm_cnt = bits(instData.SIMM16, 11, 8);', + 'gpuDynInst->wavefront()->setWaitCnts(vm_cnt, lgkm_cnt);' + ] +} + +HandCodedDecl = { + 'Inst_VOP2__V_MUL_HI_I32_I24' : [ + 'SregU64 exec;', + 'VregI32 vdst;', + 'VregI32 src_0;', + 'VregI32 src_1;' + ], + 'Inst_VOP2__V_MUL_HI_U32_U24' : [ + 'SregU64 exec;', + 'VregI32 vdst;', + 'VregU32 src_0;', + 'VregU32 src_1;' + ], + 'Inst_VOP3__V_MUL_HI_I32_I24' : [ + 'SregU64 exec;', + 'VregI32 vdst;', + 'VregI32 src_0;', + 'VregI32 src_1;' + ], + 'Inst_VOP3__V_MUL_HI_U32_U24' : [ + 'SregU64 exec;', + 'VregI32 vdst;', + 'VregU32 src_0;', + 'VregU32 src_1;' + ], + 'Inst_SOPC__S_SET_GPR_IDX_ON' : [ + 'SregU32 m0;', + 'SregU32 ssrc_0;', + 'SregU16 simm4;' + ], + 'Inst_SOP1__S_MOVRELS_B64' : [ + 'SregU64 sdst;', + 'SregU32 m0;', + 'SregU64 ssrc;' + ], + 'Inst_SOP1__S_MOVRELD_B64' : [ + 'SregU64 sdst;', + 'SregU32 m0;', + 'SregU64 ssrc;' + ], + 'Inst_VOP2__V_MADMK_F32' : [ + 'SregU64 exec;', + 'VregF32 vdst;', + 'VregF32 src_0;', + 'SregF32 k;', + 'VregF32 src_2;' + ], + 'Inst_VOP2__V_MADAK_F32' : [ + 'SregU64 exec;', + 'VregF32 vdst;', + 'VregF32 src_0;', + 'VregF32 src_1;', + 'SregF32 k;' + ], + 'Inst_VOP2__V_MADMK_F16' : [ + 'SregU64 exec;', + 'VregF16 vdst;', + 'VregF16 src_0;', + 'SregF16 k;', + 'VregF16 src_2;' + ], + 'Inst_VOP2__V_MADAK_F16' : [ + 'SregU64 exec;', + 'VregF16 vdst;', + 'VregF16 src_0;', + 'VregF16 src_1;', + 'SregF16 k;' + ], + 'Inst_VOP3__V_MAD_U64_U32' : [ + 'SregU64 exec;', + 'SregU64 vcc;', + 'VregU64 vdst;', + 'VregU32 src_0;', + 'VregU32 src_1;', + 'VregU64 src_2;' + ], + 'Inst_VOP3__V_MAD_I64_I32' : [ + 'SregU64 exec;', + 'SregU64 vcc;', + 'VregI64 vdst;', + 'VregI32 src_0;', + 'VregI32 src_1;', + 'VregI64 src_2;' + ], + 'Inst_DS__DS_WRITE_B96' : [ + 'SregU64 exec;', + 'VregU32 vmem_0;', + 'VregU32 vgpr_a;', + 'VregU32 vmem_1;', + 'VregU32 vmem_2;', + 'VregU96 vgpr_d0;' + ], + 'Inst_DS__DS_WRITE_B128' : [ + 'SregU64 exec;', + 'VregU32 vmem_0;', + 'VregU32 vgpr_a;', + 'VregU32 vmem_1;', + 'VregU32 vmem_2;', + 'VregU32 vmem_3;', + 'VregU128 vgpr_d0;' + ] +} + +HandCodedPrototypes = [ + ['void', 'ExecNOP', 'GPUDynInstPtr', 'uint32_t'], + ['void', 'ExecEndPgm', 'GPUDynInstPtr'], + ['void', 'ExecEndPgmSaved', 'GPUDynInstPtr'], + ['VregI64&', 'getSRC_SIMPLE_I64', 'GPUDynInstPtr', 'uint32_t'] +] + +HandCodedStaticInstMethods = [ + ['SregI32&', 'ViGPUStaticInst', 'getSSrcLiteral_I32', [], [], + [ + 'static SregI32 sreg;', + 'sreg = 0;', + 'return sreg;' + ] + ], + [ 'SregU32 &', 'ViGPUStaticInst', 'getSSrcLiteral_U32', [], [], + [ + 'static SregU32 sreg;', + 'sreg = 0;', + 'return sreg;' + ] + ], + [ 'VregI32 &', 'ViGPUStaticInst', 'getVSrcLiteral_I32', [], [], + [ + 'static VregI32 vreg;', + 'vreg = getSSrcLiteral_I32();', + 'return vreg;' + ] + ], + [ 'VregU32 &', 'ViGPUStaticInst', 'getVSrcLiteral_U32', [], [], + [ + 'static VregU32 vreg;', + 'vreg = getSSrcLiteral_U32();', + 'return vreg;' + ] + ] +] + +HandCodedInstProlog = { + 'getSSRC_I32' : [ + 'if (arg2 == REG_SRC_LITERAL)', + ' return getSSrcLiteral_I32();' + ], + 'getSSRC_U32' : [ + 'if (arg2 == REG_SRC_LITERAL)', + ' return getSSrcLiteral_U32();' + ], + 'getSRC_I32' : [ + 'if (arg2 == REG_SRC_LITERAL)', + ' return getVSrcLiteral_I32();' + ], + 'getSRC_U32' : [ + 'if (arg2 == REG_SRC_LITERAL)', + ' return getVSrcLiteral_U32();' + ] +} diff --git a/src/arch/gcn3/insts/gpu_static_inst.cc b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.cc similarity index 90% rename from src/arch/gcn3/insts/gpu_static_inst.cc rename to src/arch/amdgpu/gcn3/insts/gpu_static_inst.cc index f49badbcdf..4fa2ba126f 100644 --- a/src/arch/gcn3/insts/gpu_static_inst.cc +++ b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -31,10 +31,10 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/gcn3/insts/gpu_static_inst.hh" +#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" -#include "arch/gcn3/gpu_decoder.hh" -#include "arch/gcn3/insts/instructions.hh" +#include "arch/amdgpu/gcn3/gpu_decoder.hh" +#include "arch/amdgpu/gcn3/insts/instructions.hh" #include "debug/GPUExec.hh" #include "gpu-compute/shader.hh" diff --git a/src/arch/gcn3/insts/gpu_static_inst.hh b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh similarity index 96% rename from src/arch/gcn3/insts/gpu_static_inst.hh rename to src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh index bec487dcd6..03beb20b94 100644 --- a/src/arch/gcn3/insts/gpu_static_inst.hh +++ b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -34,8 +34,8 @@ #ifndef __ARCH_GCN3_INSTS_GPU_STATIC_INST_HH__ #define __ARCH_GCN3_INSTS_GPU_STATIC_INST_HH__ -#include "arch/gcn3/operand.hh" -#include "arch/gcn3/registers.hh" +#include "arch/amdgpu/gcn3/gpu_registers.hh" +#include "arch/amdgpu/gcn3/operand.hh" #include "gpu-compute/gpu_static_inst.hh" #include "gpu-compute/scalar_register_file.hh" #include "gpu-compute/vector_register_file.hh" diff --git a/src/arch/gcn3/insts/inst_util.hh b/src/arch/amdgpu/gcn3/insts/inst_util.hh similarity index 99% rename from src/arch/gcn3/insts/inst_util.hh rename to src/arch/amdgpu/gcn3/insts/inst_util.hh index 204661eb8f..9f73592e5b 100644 --- a/src/arch/gcn3/insts/inst_util.hh +++ b/src/arch/amdgpu/gcn3/insts/inst_util.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -36,7 +36,7 @@ #include -#include "arch/gcn3/registers.hh" +#include "arch/amdgpu/gcn3/gpu_registers.hh" // values for SDWA select operations enum SDWASelVals : int diff --git a/src/arch/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc similarity index 99% rename from src/arch/gcn3/insts/instructions.cc rename to src/arch/amdgpu/gcn3/insts/instructions.cc index bde87efeea..ad04a4a7a8 100644 --- a/src/arch/gcn3/insts/instructions.cc +++ b/src/arch/amdgpu/gcn3/insts/instructions.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -31,11 +31,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/gcn3/insts/instructions.hh" +#include "arch/amdgpu/gcn3/insts/instructions.hh" #include -#include "arch/gcn3/insts/inst_util.hh" +#include "arch/amdgpu/gcn3/insts/inst_util.hh" #include "debug/GCN3.hh" #include "debug/GPUSync.hh" #include "gpu-compute/shader.hh" diff --git a/src/arch/gcn3/insts/instructions.hh b/src/arch/amdgpu/gcn3/insts/instructions.hh similarity index 99% rename from src/arch/gcn3/insts/instructions.hh rename to src/arch/amdgpu/gcn3/insts/instructions.hh index f81c811640..5e00e731a9 100644 --- a/src/arch/gcn3/insts/instructions.hh +++ b/src/arch/amdgpu/gcn3/insts/instructions.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -34,9 +34,9 @@ #ifndef __ARCH_GCN3_INSTS_INSTRUCTIONS_HH__ #define __ARCH_GCN3_INSTS_INSTRUCTIONS_HH__ -#include "arch/gcn3/gpu_decoder.hh" -#include "arch/gcn3/insts/gpu_static_inst.hh" -#include "arch/gcn3/insts/op_encodings.hh" +#include "arch/amdgpu/gcn3/gpu_decoder.hh" +#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" +#include "arch/amdgpu/gcn3/insts/op_encodings.hh" #include "debug/GCN3.hh" namespace Gcn3ISA diff --git a/src/arch/gcn3/insts/op_encodings.cc b/src/arch/amdgpu/gcn3/insts/op_encodings.cc similarity index 99% rename from src/arch/gcn3/insts/op_encodings.cc rename to src/arch/amdgpu/gcn3/insts/op_encodings.cc index a2cea4d727..a6a3a26fdf 100644 --- a/src/arch/gcn3/insts/op_encodings.cc +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -31,7 +31,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/gcn3/insts/op_encodings.hh" +#include "arch/amdgpu/gcn3/insts/op_encodings.hh" #include diff --git a/src/arch/gcn3/insts/op_encodings.hh b/src/arch/amdgpu/gcn3/insts/op_encodings.hh similarity index 99% rename from src/arch/gcn3/insts/op_encodings.hh rename to src/arch/amdgpu/gcn3/insts/op_encodings.hh index f8a34619a9..0957a7dda9 100644 --- a/src/arch/gcn3/insts/op_encodings.hh +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -34,10 +34,10 @@ #ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ #define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__ -#include "arch/gcn3/gpu_decoder.hh" -#include "arch/gcn3/gpu_mem_helpers.hh" -#include "arch/gcn3/insts/gpu_static_inst.hh" -#include "arch/gcn3/operand.hh" +#include "arch/amdgpu/gcn3/gpu_decoder.hh" +#include "arch/amdgpu/gcn3/gpu_mem_helpers.hh" +#include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh" +#include "arch/amdgpu/gcn3/operand.hh" #include "debug/GCN3.hh" #include "debug/GPUExec.hh" #include "mem/ruby/system/RubySystem.hh" diff --git a/src/arch/gcn3/isa.cc b/src/arch/amdgpu/gcn3/isa.cc similarity index 97% rename from src/arch/gcn3/isa.cc rename to src/arch/amdgpu/gcn3/isa.cc index ca3a8d8384..560985d7a9 100644 --- a/src/arch/gcn3/isa.cc +++ b/src/arch/amdgpu/gcn3/isa.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 Advanced Micro Devices, Inc. + * Copyright (c) 2016-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -31,7 +31,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/gcn3/gpu_isa.hh" +#include "arch/amdgpu/gcn3/gpu_isa.hh" #include diff --git a/src/arch/gcn3/operand.hh b/src/arch/amdgpu/gcn3/operand.hh similarity index 99% rename from src/arch/gcn3/operand.hh rename to src/arch/amdgpu/gcn3/operand.hh index 39d3d13d4f..5397b8eff8 100644 --- a/src/arch/gcn3/operand.hh +++ b/src/arch/amdgpu/gcn3/operand.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Advanced Micro Devices, Inc. + * Copyright (c) 2017-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -36,7 +36,7 @@ #include -#include "arch/gcn3/registers.hh" +#include "arch/amdgpu/gcn3/gpu_registers.hh" #include "arch/generic/vec_reg.hh" #include "gpu-compute/scalar_register_file.hh" #include "gpu-compute/vector_register_file.hh" diff --git a/src/arch/gcn3/registers.cc b/src/arch/amdgpu/gcn3/registers.cc similarity index 98% rename from src/arch/gcn3/registers.cc rename to src/arch/amdgpu/gcn3/registers.cc index 81f48eca7a..182f6775a5 100644 --- a/src/arch/gcn3/registers.cc +++ b/src/arch/amdgpu/gcn3/registers.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Advanced Micro Devices, Inc. + * Copyright (c) 2015-2021 Advanced Micro Devices, Inc. * All rights reserved. * * For use for simulation and test purposes only @@ -31,7 +31,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "arch/gcn3/registers.hh" +#include "arch/amdgpu/gcn3/gpu_registers.hh" namespace Gcn3ISA {