From 82675648c874d6f9d00b3cb46c896dda618c4961 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Mon, 9 Oct 2023 18:23:40 +0100 Subject: [PATCH] cpu: Implement a CapstoneDisassembler Capstone is an open source disassembler [1] already used by other projects (like QEMU). gem5 is already capable of disassembling instructions. Every StaticInst is supposed to define a generateDisassembly method which returns the instruction mnemonic (opcode + operand list) as a string. This "distributed" implementation of a disassembler relies on the developer to properly populate the metadata fields of the base instruction class. The growing complexity of the ISA code and the massive reuse of base classes beyond their intended use has led to a disassembling logic which contains several bugs. By allowing a tracer to rely on a third party disassembler, we fill the intruction trace with a more trustworthy instruction stream. This will make any trace parsing tool to work better and it will also allow us to spot/fix our own bugs by comparing instruction traces with native vs custom disassembler [1]: http://www.capstone-engine.org/ Change-Id: I3c4db5072c03d2731265d0398d3863c101dcb180 Signed-off-by: Giacomo Travaglini --- src/cpu/Capstone.py | 45 +++++++++++++++++++ src/cpu/SConscript | 5 ++- src/cpu/SConsopts | 50 ++++++++++++++++++++++ src/cpu/capstone.cc | 90 ++++++++++++++++++++++++++++++++++++++ src/cpu/capstone.hh | 102 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 src/cpu/Capstone.py create mode 100644 src/cpu/SConsopts create mode 100644 src/cpu/capstone.cc create mode 100644 src/cpu/capstone.hh diff --git a/src/cpu/Capstone.py b/src/cpu/Capstone.py new file mode 100644 index 0000000000..4b6b5fd84a --- /dev/null +++ b/src/cpu/Capstone.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.SimObject import SimObject +from m5.params import * +from m5.objects.InstTracer import InstDisassembler + + +class CapstoneDisassembler(InstDisassembler): + type = "CapstoneDisassembler" + cxx_class = "gem5::trace::CapstoneDisassembler" + cxx_header = "cpu/capstone.hh" + abstract = True diff --git a/src/cpu/SConscript b/src/cpu/SConscript index d6dcd2f6ea..03ba7b924d 100644 --- a/src/cpu/SConscript +++ b/src/cpu/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2020 ARM Limited +# Copyright (c) 2020, 2023 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -115,6 +115,9 @@ Source('simple_thread.cc') Source('thread_context.cc') Source('thread_state.cc') Source('timing_expr.cc') +SourceLib('capstone', tags='capstone') +Source('capstone.cc', tags='capstone') +SimObject('Capstone.py', sim_objects=['CapstoneDisassembler'], tags='capstone') SimObject('DummyChecker.py', sim_objects=['DummyChecker']) Source('checker/cpu.cc') diff --git a/src/cpu/SConsopts b/src/cpu/SConsopts new file mode 100644 index 0000000000..94e55ece32 --- /dev/null +++ b/src/cpu/SConsopts @@ -0,0 +1,50 @@ +# Copyright (c) 2023 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Import('*') + +from gem5_scons import warning + +import gem5_scons + +with gem5_scons.Configure(main) as conf: + # Check for + conf.env['CONF']['HAVE_CAPSTONE'] = conf.CheckHeader('capstone/capstone.h', '<>') + + if conf.env['CONF']['HAVE_CAPSTONE']: + conf.env.TagImplies('capstone', 'gem5 lib') + else: + warning("Header file not found.\n" + "This host has no capstone library installed.") diff --git a/src/cpu/capstone.cc b/src/cpu/capstone.cc new file mode 100644 index 0000000000..4c2896312d --- /dev/null +++ b/src/cpu/capstone.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2023 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cpu/capstone.hh" + +#include "base/output.hh" + +namespace gem5 +{ + +namespace trace +{ + +std::string +CapstoneDisassembler::disassemble(StaticInstPtr inst, + const PCStateBase &pc, + const loader::SymbolTable *symtab) const +{ + std::string inst_dist; + if (inst->isPseudo() || inst->isMicroop()) { + // Capstone doesn't have any visibility over microops nor over + // gem5 pseudo ops. Use native disassembler instead + inst_dist = InstDisassembler::disassemble(inst, pc, symtab); + } else { + // Stripping the extended fields from the ExtMachInst + auto mach_inst = inst->getEMI() & mask(inst->size() * 8); + + cs_insn *insn; + // capstone disassembler + if (const csh *curr_handle = currHandle(pc); curr_handle != nullptr) { + size_t count = cs_disasm(*curr_handle, (uint8_t*)&mach_inst, + inst->size(), 0, 0, &insn); + + // As we are passing only one instruction, we are expecting one instruction only + // being disassembled + assert(count <= 1); + + for (int idx = 0; idx < count; idx++) { + inst_dist += csprintf(" %s %s", insn[idx].mnemonic, insn[idx].op_str); + } + } else { + // No valid handle; return an invalid string + inst_dist += " capstone failure"; + } + } + + return inst_dist; +} + +CapstoneDisassembler::CapstoneDisassembler(const Params &p) + : InstDisassembler(p) +{ +} + +} // namespace trace +} // namespace gem5 diff --git a/src/cpu/capstone.hh b/src/cpu/capstone.hh new file mode 100644 index 0000000000..1a197e5086 --- /dev/null +++ b/src/cpu/capstone.hh @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 Arm Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_CAPSTONE_HH__ +#define __CPU_CAPSTONE_HH__ + +#include + +#include "params/CapstoneDisassembler.hh" +#include "sim/insttracer.hh" + +namespace gem5 +{ + +class ThreadContext; + +namespace trace { + +/** + * Capstone Disassembler: + * The disassembler relies on the capstone library to convert + * the StaticInst encoding into the disassembled string. + * + * One thing to keep in mind is that the disassembled + * instruction might not coincide with the instruction being + * decoded + executed in gem5. This could be the case if + * there was a bug in either gem5 or in capstone itself. + * This scenatio is not possible with the native gem5 disassembler + * as the instruction mnemonic is tightly coupled with the + * decoded(=generated) instruction (you print what you decode) + * + * The Capstone dispatches to the native disassembler in + * two cases: + * + * a) m5 pseudo ops + * b) micro-ops + */ +class CapstoneDisassembler : public InstDisassembler +{ + public: + PARAMS(CapstoneDisassembler); + CapstoneDisassembler(const Params &p); + + std::string + disassemble(StaticInstPtr inst, + const PCStateBase &pc, + const loader::SymbolTable *symtab) const override; + + protected: + + /** + * Return a pointer to the current capstone handle (csh). + * + * Any ISA extension of the Capstone disassembler should + * initialize (with cs_open) one or more capstone handles + * at construcion time. + * (You might need more than one handle in case the ISA + * has more than one mode of operation, e.g. arm and arm64) + * The current handle in use should be returned every time + * the currHandle is called. + */ + virtual const csh* currHandle(const PCStateBase &pc) const = 0; +}; + +} // namespace trace +} // namespace gem5 + +#endif // __CPU_CAPSTONE_HH__