arch-power: Refactor arithmetic instructions

This changes the base classes for integer arithmetic
instructions and introduces two new classes that are used
to distinguish between instructions using register and
immediate operands.

Decoding has also been consolidated using formats that can
generate code after determining if an instruction records
carry and overflow and also if it records the nature of the
result, i.e. lesser than, greater than or equal to zero.
However, for multiply and divide instructions, the code to
determine if an overflow has occurred has been moved to the
instruction definition itself. The formats have also been
updated to make use of the new base classes.

Change-Id: I23d70ac4bad4d25d876308db0b3564c092bf574c
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/40898
Reviewed-by: Boris Shingarov <shingarov@labware.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Sandipan Das
2021-02-06 17:17:28 +05:30
committed by Boris Shingarov
parent 46d6baed87
commit 3e33206836
7 changed files with 176 additions and 173 deletions

View File

@@ -43,11 +43,12 @@ class FloatOp : public PowerStaticInst
{
protected:
bool rcSet;
bool rc;
/// Constructor
FloatOp(const char *mnem, MachInst _machInst, OpClass __opClass)
: PowerStaticInst(mnem, _machInst, __opClass)
: PowerStaticInst(mnem, _machInst, __opClass),
rc(machInst.rc)
{
}

View File

@@ -61,8 +61,10 @@ IntOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
}
// Additional characters depending on isa bits being set
if (oeSet) myMnemonic = myMnemonic + "o";
if (rcSet) myMnemonic = myMnemonic + ".";
if (oe)
myMnemonic = myMnemonic + "o";
if (rc)
myMnemonic = myMnemonic + ".";
ccprintf(ss, "%-10s ", myMnemonic);
// Print the first destination only
@@ -116,7 +118,7 @@ IntImmOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
}
// Print the immediate value last
ss << ", " << (int32_t)imm;
ss << ", " << (int32_t)si;
return ss.str();
}

View File

@@ -51,8 +51,8 @@ class IntOp : public PowerStaticInst
{
protected:
bool rcSet;
bool oeSet;
bool rc;
bool oe;
// Needed for srawi only
uint32_t sh;
@@ -60,7 +60,8 @@ class IntOp : public PowerStaticInst
/// Constructor
IntOp(const char *mnem, MachInst _machInst, OpClass __opClass)
: PowerStaticInst(mnem, _machInst, __opClass),
rcSet(false), oeSet(false)
rc(machInst.rc),
oe(machInst.oe)
{
}
@@ -104,14 +105,14 @@ class IntImmOp : public IntOp
{
protected:
int32_t imm;
uint32_t uimm;
int32_t si;
uint32_t ui;
/// Constructor
IntImmOp(const char *mnem, MachInst _machInst, OpClass __opClass)
: IntOp(mnem, _machInst, __opClass),
imm(sext<16>(machInst.si)),
uimm(machInst.si)
si(sext<16>(machInst.si)),
ui(machInst.si)
{
}
@@ -120,6 +121,39 @@ class IntImmOp : public IntOp
};
/**
* Class for integer arithmetic operations.
*/
class IntArithOp : public IntOp
{
protected:
/// Constructor
IntArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
: IntOp(mnem, _machInst, __opClass)
{
}
};
/**
* Class for integer immediate arithmetic operations.
*/
class IntImmArithOp : public IntArithOp
{
protected:
int32_t si;
/// Constructor
IntImmArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
: IntArithOp(mnem, _machInst, __opClass),
si(sext<16>(machInst.si))
{
}
};
/**
* Class for integer operations with a shift.
*/

View File

@@ -39,42 +39,48 @@ decode PO default Unknown::unknown() {
format IntImmArithOp {
7: mulli({{
int32_t src = Ra_sw;
int64_t prod = src * imm;
int64_t prod = src * si;
Rt = (uint32_t)prod;
}});
8: subfic({{ int32_t src = ~Ra; Rt = src + imm + 1; }},
[computeCA]);
8: subfic({{
uint64_t src = ~Ra;
Rt = src + si + 1;
}}, true);
}
format IntImmOp {
10: cmpli({{
Xer xer = XER;
uint32_t cr = makeCRFieldUnsigned(Ra_uw, uimm, xer.so);
uint32_t cr = makeCRFieldUnsigned(Ra_uw, ui, xer.so);
CR = insertCRField(CR, BF, cr);
}});
11: cmpi({{
Xer xer = XER;
uint32_t cr = makeCRFieldSigned(Ra_sw, imm, xer.so);
uint32_t cr = makeCRFieldSigned(Ra_sw, si, xer.so);
CR = insertCRField(CR, BF, cr);
}});
}
format IntImmArithOp {
12: addic({{ uint32_t src = Ra; Rt = src + imm; }},
[computeCA]);
12: addic({{
uint64_t src = Ra;
Rt = src + si;
}}, true);
13: addic_({{ uint32_t src = Ra; Rt = src + imm; }},
[computeCA, computeCR0]);
13: addic_({{
uint64_t src = Ra;
Rt = src + si;
}}, true, true);
}
format IntImmArithCheckRaOp {
14: addi({{ Rt = Ra + imm; }},
{{ Rt = imm }});
14: addi({{ Rt = Ra + si; }},
{{ Rt = si }});
15: addis({{ Rt = Ra + (imm << 16); }},
{{ Rt = imm << 16; }});
15: addis({{ Rt = Ra + (si << 16); }},
{{ Rt = si << 16; }});
}
// Conditionally branch to a PC-relative or absoulute address based
@@ -168,12 +174,12 @@ decode PO default Unknown::unknown() {
}
format IntImmLogicOp {
24: ori({{ Ra = Rs | uimm; }});
25: oris({{ Ra = Rs | (uimm << 16); }});
26: xori({{ Ra = Rs ^ uimm; }});
27: xoris({{ Ra = Rs ^ (uimm << 16); }});
28: andi_({{ Ra = Rs & uimm; }}, true);
29: andis_({{ Ra = Rs & (uimm << 16); }}, true);
24: ori({{ Ra = Rs | ui; }});
25: oris({{ Ra = Rs | (ui << 16); }});
26: xori({{ Ra = Rs ^ ui; }});
27: xoris({{ Ra = Rs ^ (ui << 16); }});
28: andi_({{ Ra = Rs & ui; }}, true);
29: andis_({{ Ra = Rs & (ui << 16); }}, true);
}
// There are a large number of instructions that have the same primary
@@ -479,14 +485,14 @@ decode PO default Unknown::unknown() {
10: addc({{ Ra }}, {{ Rb }}, computeCA = true);
}
11: IntArithOp::mulhwu({{
11: IntArithCheckRcOp::mulhwu({{
uint64_t prod = Ra_ud * Rb_ud;
Rt = prod >> 32;
}});
40: IntSumOp::subf({{ ~Ra }}, {{ Rb }}, {{ 1 }});
75: IntArithOp::mulhw({{
75: IntArithCheckRcOp::mulhw({{
int64_t prod = Ra_sd * Rb_sd;
Rt = prod >> 32;
}});
@@ -497,28 +503,21 @@ decode PO default Unknown::unknown() {
138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }}, true);
200: subfze({{ ~Ra }}, {{ xer.ca }}, computeCA = true);
202: addze({{ Ra }}, {{ xer.ca }}, computeCA = true);
232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }}, true);
234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }}, true);
232: subfme({{ ~Ra }}, {{ -1ULL }}, {{ xer.ca }}, true);
234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }}, true);
}
format IntArithOp {
235: mullw({{
int64_t prod = Ra_sd * Rb_sd;
Rt = prod;
}});
// Another variant of mullw decoded with the OE bit set
747: mullwo({{
int64_t src1 = Ra_sd;
int64_t src2 = Rb;
int64_t prod = src1 * src2;
Rt = prod;
}}, true);
}
235: IntArithCheckRcOp::mullw({{
int64_t prod = Ra_sd * Rb_sd;
Rt = prod;
if (prod != (int32_t)prod) {
setOV = true;
}
}}, true);
266: IntSumOp::add({{ Ra }}, {{ Rb }});
format IntArithOp {
format IntArithCheckRcOp {
459: divwu({{
uint32_t src1 = Ra_sw;
uint32_t src2 = Rb_sw;
@@ -526,18 +525,7 @@ decode PO default Unknown::unknown() {
Rt = src1 / src2;
} else {
Rt = 0;
}
}});
// Another variant of divwu decoded with the OE bit set
971: divwuo({{
uint32_t src1 = Ra_sw;
uint32_t src2 = Rb_sw;
if (src2 != 0) {
Rt = src1 / src2;
} else {
Rt = 0;
divSetOV = true;
setOV = true;
}
}}, true);
@@ -549,19 +537,7 @@ decode PO default Unknown::unknown() {
Rt = src1 / src2;
} else {
Rt = 0;
}
}});
// Another variant of divw decoded with the OE bit set
1003: divwo({{
int32_t src1 = Ra_sw;
int32_t src2 = Rb_sw;
if ((src1 != 0x80000000 || src2 != 0xffffffff)
&& src2 != 0) {
Rt = src1 / src2;
} else {
Rt = 0;
divSetOV = true;
setOV = true;
}
}}, true);
}

View File

@@ -71,7 +71,7 @@ def format FloatRCCheckOp(code, inst_flags = []) {{
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'FloatOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1
@@ -96,7 +96,7 @@ def format FloatArithOp(code, inst_flags = []) {{
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'FloatOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1
@@ -121,7 +121,7 @@ def format FloatConvertOp(code, inst_flags = []) {{
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'FloatOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1

View File

@@ -31,44 +31,6 @@
// Integer ALU instructions
//
// Instruction class constructor template when Rc is set.
def template IntRcConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(set_reg_idx_arr)s;
%(constructor)s;
rcSet = true;
}
}};
// Instruction class constructor template when OE is set.
def template IntOeConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(set_reg_idx_arr)s;
%(constructor)s;
oeSet = true;
}
}};
// Instruction class constructor template when both Rc and OE are set.
def template IntRcOeConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
{
%(set_reg_idx_arr)s;
%(constructor)s;
rcSet = true;
oeSet = true;
}
}};
let {{
readXERCode = 'Xer xer = XER;'
@@ -98,17 +60,12 @@ computeOVCode = '''
}
'''
computeDivOVCode = '''
if (divSetOV) {
setOVCode = '''
if (setOV) {
xer.ov = 1;
xer.so = 1;
} else {
if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
xer.ov = 1;
xer.so = 1;
} else {
xer.ov = 0;
}
xer.ov = 0;
}
'''
@@ -136,21 +93,23 @@ def format IntImmOp(code, inst_flags = []) {{
// value in source register Ra, hence the use of src to hold the actual
// value. The control flags include the use of code to compute the
// carry bit or the CR0 code.
def format IntImmArithOp(code, ctrl_flags = [], inst_flags = []) {{
def format IntImmArithOp(code, computeCA = 0, computeCR0 = 0,
inst_flags = []) {{
# Set up the dictionary and deal with control flags
dict = {'result':'Rt', 'inputa':'src', 'inputb':'imm'}
if ctrl_flags:
# Set up the dictionary
dict = {'result':'Rt', 'inputa':'src', 'inputb':'si'}
# Deal with computing CR0 and carry
if computeCA or computeCR0:
code += readXERCode
for val in ctrl_flags:
if val == 'computeCA':
code += computeCACode % dict + setXERCode
elif val == 'computeCR0':
code += computeCR0Code % dict
if computeCA:
code += computeCACode % dict + setXERCode
if computeCR0:
code += computeCR0Code % dict
# Generate the class
(header_output, decoder_output, decode_block, exec_output) = \
GenAluOp(name, Name, 'IntImmOp', code, inst_flags, BasicDecode,
GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags, BasicDecode,
BasicConstructor)
}};
@@ -163,12 +122,12 @@ def format IntImmArithCheckRaOp(code, code_ra0, inst_flags = []) {{
# First the version where Ra is non-zero
(header_output, decoder_output, decode_block, exec_output) = \
GenAluOp(name, Name, 'IntImmOp', code, inst_flags,
GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags,
CheckRaDecode, BasicConstructor)
# Now another version where Ra == 0
(header_output_ra0, decoder_output_ra0, _, exec_output_ra0) = \
GenAluOp(name, Name + 'RaZero', 'IntImmOp', code_ra0, inst_flags,
GenAluOp(name, Name + 'RaZero', 'IntImmArithOp', code_ra0, inst_flags,
CheckRaDecode, BasicConstructor)
# Finally, add to the other outputs
@@ -213,7 +172,7 @@ def format IntLogicOp(code, inst_flags = []) {{
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1
@@ -238,7 +197,7 @@ def format IntShiftOp(code, inst_flags = []) {{
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntShiftOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1
@@ -264,9 +223,9 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
# Add code to set up variables and do the sum
code = 'uint32_t src1 = ' + src1 + ';\n'
code += 'uint32_t src2 = ' + src2 + ';\n'
code += 'uint32_t ca = ' + ca + ';\n'
code = 'uint64_t src1 = ' + src1 + ';\n'
code += 'uint64_t src2 = ' + src2 + ';\n'
code += 'uint64_t ca = ' + ca + ';\n'
code += 'Rt = src1 + src2 + ca;\n'
# Add code for calculating the carry, if needed
@@ -284,17 +243,17 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
# Generate the classes
(header_output, decoder_output, decode_block, exec_output) = \
GenAluOp(name, Name, 'IntOp', code, inst_flags,
GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
CheckRcOeDecode, BasicConstructor)
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
CheckRcOeDecode, IntRcConstructor)
GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
CheckRcOeDecode, BasicConstructor)
(header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
GenAluOp(name, Name + 'OeSet', 'IntOp', code_oe1, inst_flags,
CheckRcOeDecode, IntOeConstructor)
GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
CheckRcOeDecode, BasicConstructor)
(header_output_rc1_oe1, decoder_output_rc1_oe1, _, exec_output_rc1_oe1) = \
GenAluOp(name, Name + 'RcSetOeSet', 'IntOp', code_rc1_oe1,
inst_flags, CheckRcOeDecode, IntRcOeConstructor)
GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
inst_flags, CheckRcOeDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += \
@@ -309,39 +268,69 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
// Instructions that use source registers Ra and Rb, with the result
// placed into Rt. Basically multiply and divide instructions. The
// carry bit is never set, but overflow can be calculated. Division
// explicitly sets the overflow bit in certain situations and this is
// dealt with using the 'divSetOV' boolean in decoder.isa. We generate
// two versions of each instruction to deal with the Rc bit.
def format IntArithOp(code, computeOV = 0, inst_flags = []) {{
// carry bit is never set, but overflow can be calculated. In certain
// situations, the overflow bits have to be set and this is dealt with
// using the 'setOV' boolean in decoder.isa.
//
// In case overflow is to be calculated, we generate four versions of
// each instruction to deal with different combinations of having the
// OE bit set or unset and the Rc bit set or unset too. Otherwise, we
// generate two versions of each instruction to deal with the Rc bit.
def format IntArithCheckRcOp(code, computeOV = 0, inst_flags = []) {{
# The result is always in Rt, but the source values vary
dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
# Deal with setting the overflow flag
if computeOV:
code = 'bool divSetOV = false;\n' + code
code += computeDivOVCode % dict + setXERCode
# Setup the 4 code versions and add code to access XER if necessary
code = 'GEM5_VAR_USED bool setOV = false;\n' + code
code_rc1 = readXERCode + code + computeCR0Code % dict
code_oe1 = readXERCode + code + setOVCode + setXERCode
code_rc1_oe1 = readXERCode + code + setOVCode + setXERCode
code_rc1_oe1 += computeCR0Code % dict
# Setup the 2 code versions and add code to access XER if necessary
code_rc1 = readXERCode + code + computeCR0Code % dict
if computeOV:
code = readXERCode + code
# Generate the classes
(header_output, decoder_output, decode_block, exec_output) = \
GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
CheckRcOeDecode, BasicConstructor)
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
CheckRcOeDecode, BasicConstructor)
(header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
CheckRcOeDecode, BasicConstructor)
(header_output_rc1_oe1, decoder_output_rc1_oe1, _,
exec_output_rc1_oe1) = \
GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
inst_flags, CheckRcOeDecode, BasicConstructor)
# Generate the classes
(header_output, decoder_output, decode_block, exec_output) = \
GenAluOp(name, Name, 'IntOp', code, inst_flags,
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += \
header_output_rc1 + header_output_oe1 + header_output_rc1_oe1
decoder_output += \
decoder_output_rc1 + decoder_output_oe1 + decoder_output_rc1_oe1
exec_output += \
exec_output_rc1 + exec_output_oe1 + exec_output_rc1_oe1
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
else:
# Setup the 2 code versions and add code to access XER if necessary
code_rc1 = readXERCode + code + computeCR0Code % dict
# Finally, add to the other outputs
header_output += header_output_rc1
decoder_output += decoder_output_rc1
exec_output += exec_output_rc1
# Generate the first class
(header_output, decoder_output, decode_block, exec_output) = \
GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
CheckRcDecode, BasicConstructor)
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1
decoder_output += decoder_output_rc1
exec_output += exec_output_rc1
}};
@@ -364,7 +353,7 @@ def format IntRotateOp(code, inst_flags = []) {{
# Generate the second class
(header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
GenAluOp(name, Name + 'RcSet', 'IntRotateOp', code_rc1, inst_flags,
CheckRcDecode, IntRcConstructor)
CheckRcDecode, BasicConstructor)
# Finally, add to the other outputs
header_output += header_output_rc1

View File

@@ -52,6 +52,7 @@ BitUnion32(ExtMachInst)
// Immediate fields
Bitfield<15, 0> si;
Bitfield<15, 0> ui;
Bitfield<15, 0> d;
Bitfield<15, 2> ds;
@@ -70,7 +71,7 @@ BitUnion32(ExtMachInst)
Bitfield<0> lk;
// Record bits
Bitfield<0> rc31;
Bitfield<0> rc;
Bitfield<10> oe;
// Condition register fields