From 0fef2300c06af42876889676aade4c264675ca5c Mon Sep 17 00:00:00 2001 From: Roger Chang Date: Mon, 12 Jun 2023 14:06:57 +0800 Subject: [PATCH] arch-riscv: Refactor fmax and fmin instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently fmax and fmin instructions convert source float registers such as Fs1_bits to float64_t(or float32_t and float16_t) many times in the single instruction. It is not efficient for the future maintenance of these instructions. The change adds non-register float_t intermediate variables fs1 and fs2 to keep converted results so that we don’t need to do it repeatedly. It also added an intermediate variable fd for specific float type to assume the upper bits of the packed float register are all one. Change-Id: Ic508d5255db6c4b38ca4df6dd805df440c043fff Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71479 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/arch/riscv/isa/decoder.isa | 121 +++++++++++++++------------------ 1 file changed, 54 insertions(+), 67 deletions(-) diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2dcd118225..a339c11375 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -1615,93 +1615,80 @@ decode QUADRANT default Unknown::unknown() { } 0x14: decode ROUND_MODE { 0x0: fmin_s({{ - bool less = f32_lt_quiet(f32(freg(Fs1_bits)), - f32(freg(Fs2_bits))) || - (f32_eq(f32(freg(Fs1_bits)), - f32(freg(Fs2_bits))) && - bits(f32(freg(Fs1_bits)).v, 31)); + float32_t fs1 = f32(freg(Fs1_bits)); + float32_t fs2 = f32(freg(Fs2_bits)); + float32_t fd; + bool less = f32_lt_quiet(fs1, fs2) || + (f32_eq(fs1, fs2) && bits(fs1.v, 31)); - Fd_bits = less || - isNaNF32UI(f32(freg(Fs2_bits)).v) ? - freg(Fs1_bits).v : freg(Fs2_bits).v; - if (isNaNF32UI(f32(freg(Fs1_bits)).v) && - isNaNF32UI(f32(freg(Fs2_bits)).v)) - Fd_bits = f32(defaultNaNF32UI).v; + fd = less || isNaNF32UI(fs2.v) ? fs1 : fs2; + if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v)) + fd = f32(defaultNaNF32UI); + Fd_bits = freg(fd).v; }}, FloatCmpOp); 0x1: fmax_s({{ - bool greater = f32_lt_quiet(f32(freg(Fs2_bits)), - f32(freg(Fs1_bits))) || - (f32_eq(f32(freg(Fs2_bits)), - f32(freg(Fs1_bits))) && - bits(f32(freg(Fs2_bits)).v, 31)); + float32_t fs1 = f32(freg(Fs1_bits)); + float32_t fs2 = f32(freg(Fs2_bits)); + float32_t fd; + bool greater = f32_lt_quiet(fs2, fs1) || + (f32_eq(fs2, fs1) && bits(fs2.v, 31)); - Fd_bits = greater || - isNaNF32UI(f32(freg(Fs2_bits)).v) ? - freg(Fs1_bits).v : freg(Fs2_bits).v; - if (isNaNF32UI(f32(freg(Fs1_bits)).v) && - isNaNF32UI(f32(freg(Fs2_bits)).v)) - Fd_bits = f32(defaultNaNF32UI).v; + fd = greater || isNaNF32UI(fs2.v) ? fs1: fs2; + if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v)) + fd = f32(defaultNaNF32UI); + Fd_bits = freg(fd).v; }}, FloatCmpOp); } 0x15: decode ROUND_MODE { 0x0: fmin_d({{ - bool less = f64_lt_quiet(f64(freg(Fs1_bits)), - f64(freg(Fs2_bits))) || - (f64_eq(f64(freg(Fs1_bits)), - f64(freg(Fs2_bits))) && - bits(f64(freg(Fs1_bits)).v, 63)); + float64_t fs1 = f64(freg(Fs1_bits)); + float64_t fs2 = f64(freg(Fs2_bits)); + float64_t fd; + bool less = f64_lt_quiet(fs1, fs2) || + (f64_eq(fs1, fs2) && bits(fs1.v, 63)); - Fd_bits = less || - isNaNF64UI(f64(freg(Fs2_bits)).v) ? - freg(Fs1_bits).v : freg(Fs2_bits).v; - if (isNaNF64UI(f64(freg(Fs1_bits)).v) && - isNaNF64UI(f64(freg(Fs2_bits)).v)) - Fd_bits = f64(defaultNaNF64UI).v; + fd = less || isNaNF64UI(fs2.v) ? fs1 : fs2; + if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v)) + fd = f64(defaultNaNF64UI); + Fd_bits = freg(fd).v; }}, FloatCmpOp); 0x1: fmax_d({{ - bool greater = - f64_lt_quiet(f64(freg(Fs2_bits)), - f64(freg(Fs1_bits))) || - (f64_eq(f64(freg(Fs2_bits)), - f64(freg(Fs1_bits))) && - bits(f64(freg(Fs2_bits)).v, 63)); + float64_t fs1 = f64(freg(Fs1_bits)); + float64_t fs2 = f64(freg(Fs2_bits)); + float64_t fd; + bool greater = f64_lt_quiet(fs2, fs1) || + (f64_eq(fs2, fs1) && bits(fs2.v, 63)); - Fd_bits = greater || - isNaNF64UI(f64(freg(Fs2_bits)).v) ? - freg(Fs1_bits).v : freg(Fs2_bits).v; - if (isNaNF64UI(f64(freg(Fs1_bits)).v) && - isNaNF64UI(f64(Fs2_bits).v)) - Fd_bits = f64(defaultNaNF64UI).v; + fd = greater || isNaNF64UI(fs2.v) ? fs1 : fs2; + if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v)) + fd = f64(defaultNaNF64UI); + Fd_bits = freg(fd).v; }}, FloatCmpOp); } 0x16: decode ROUND_MODE { 0x0: fmin_h({{ - bool less = f16_lt_quiet(f16(freg(Fs1_bits)), - f16(freg(Fs2_bits))) || - (f16_eq(f16(freg(Fs1_bits)), - f16(freg(Fs2_bits))) && - bits(f16(freg(Fs1_bits)).v, 15)); + float16_t fs1 = f16(freg(Fs1_bits)); + float16_t fs2 = f16(freg(Fs2_bits)); + float16_t fd; + bool less = f16_lt_quiet(fs1, fs2) || + (f16_eq(fs1, fs2) && bits(fs1.v, 15)); - Fd_bits = less || - isNaNF16UI(f16(freg(Fs2_bits)).v) ? - freg(Fs1_bits).v : freg(Fs2_bits).v; - if (isNaNF16UI(f16(freg(Fs1_bits)).v) && - isNaNF16UI(f16(freg(Fs2_bits)).v)) - Fd_bits = f16(defaultNaNF16UI).v; + fd = less || isNaNF16UI(fs2.v) ? fs1 : fs2; + if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v)) + fd = f16(defaultNaNF16UI); + Fd_bits = freg(fd).v; }}, FloatCmpOp); 0x1: fmax_h({{ - bool greater = f16_lt_quiet(f16(freg(Fs2_bits)), - f16(freg(Fs1_bits))) || - (f16_eq(f16(freg(Fs2_bits)), - f16(freg(Fs1_bits))) && - bits(f16(freg(Fs2_bits)).v, 15)); + float16_t fs1 = f16(freg(Fs1_bits)); + float16_t fs2 = f16(freg(Fs2_bits)); + float16_t fd; + bool greater = f16_lt_quiet(fs2, fs1) || + (f16_eq(fs2, fs1) && bits(fs2.v, 15)); - Fd_bits = greater || - isNaNF16UI(f16(freg(Fs2_bits)).v) ? - freg(Fs1_bits).v : freg(Fs2_bits).v; - if (isNaNF16UI(f16(freg(Fs1_bits)).v) && - isNaNF16UI(f16(freg(Fs2_bits)).v)) - Fd_bits = f16(defaultNaNF16UI).v; + fd = greater || isNaNF16UI(fs2.v) ? fs1 : fs2; + if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v)) + fd = f16(defaultNaNF16UI); + Fd_bits = freg(fd).v; }}, FloatCmpOp); } 0x20: decode CONV_SGN {