diff --git a/lldb/examples/python/filter_disasm.py b/lldb/examples/python/filter_disasm.py new file mode 100644 index 0000000000000..adb3455209055 --- /dev/null +++ b/lldb/examples/python/filter_disasm.py @@ -0,0 +1,87 @@ +""" +Defines a command, fdis, that does filtered disassembly. The command does the +lldb disassemble command with -b and any other arguments passed in, and +pipes that through a provided filter program. + +The intention is to support disassembly of RISC-V proprietary instructions. +This is handled with llvm-objdump by piping the output of llvm-objdump through +a filter program. This script is intended to mimic that workflow. +""" + +import lldb +import subprocess + +filter_program = "crustfilt" + +def __lldb_init_module(debugger, dict): + debugger.HandleCommand( + 'command script add -f filter_disasm.fdis fdis') + print("Disassembly filter command (fdis) loaded") + print("Filter program set to %s" % filter_program) + + +def fdis(debugger, args, result, dict): + """ + Call the built in disassembler, then pass its output to a filter program + to add in disassembly for hidden opcodes. + Except for get and set, use the fdis command like the disassemble command. + By default, the filter program is crustfilt, from + https://github.com/quic/crustfilt . This can be changed by changing + the global variable filter_program. + + Usage: + fdis [[get] [set ] []] + + Choose one of the following: + get + Gets the current filter program + + set + Sets the current filter program. This can be an executable, which + will be found on PATH, or an absolute path. + + + If the first argument is not get or set, the args will be passed + to the disassemble command as is. + + """ + + global filter_program + args_list = args.split(' ') + result.Clear() + + if len(args_list) == 1 and args_list[0] == 'get': + result.PutCString(filter_program) + result.SetStatus(lldb.eReturnStatusSuccessFinishResult) + return + + if len(args_list) == 2 and args_list[0] == 'set': + filter_program = args_list[1] + result.PutCString("Filter program set to %s" % filter_program) + result.SetStatus(lldb.eReturnStatusSuccessFinishResult) + return + + res = lldb.SBCommandReturnObject() + debugger.GetCommandInterpreter().HandleCommand('disassemble -b ' + args, res) + if (len(res.GetError()) > 0): + result.SetError(res.GetError()) + result.SetStatus(lldb.eReturnStatusFailed) + return + output = res.GetOutput() + + try: + proc = subprocess.run([filter_program], capture_output=True, text=True, input=output) + except (subprocess.SubprocessError, OSError) as e: + result.PutCString("Error occurred. Original disassembly:\n\n" + output) + result.SetError(str(e)) + result.SetStatus(lldb.eReturnStatusFailed) + return + + print(proc.stderr) + if proc.stderr: + pass + #result.SetError(proc.stderr) + #result.SetStatus(lldb.eReturnStatusFailed) + else: + result.PutCString(proc.stdout) + result.SetStatus(lldb.eReturnStatusSuccessFinishResult) diff --git a/lldb/include/lldb/Core/Opcode.h b/lldb/include/lldb/Core/Opcode.h index f72f2687b54fe..88ef17093d3f3 100644 --- a/lldb/include/lldb/Core/Opcode.h +++ b/lldb/include/lldb/Core/Opcode.h @@ -200,6 +200,7 @@ class Opcode { } int Dump(Stream *s, uint32_t min_byte_width); + int DumpRISCV(Stream *s, uint32_t min_byte_width); const void *GetOpcodeBytes() const { return ((m_type == Opcode::eTypeBytes) ? m_data.inst.bytes : nullptr); diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index 833e327579a29..f95e446448036 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -658,8 +658,13 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, // the byte dump to be able to always show 15 bytes (3 chars each) plus a // space if (max_opcode_byte_size > 0) - m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); - else + // make RISC-V opcode dump look like llvm-objdump + if (exe_ctx && + exe_ctx->GetTargetSP()->GetArchitecture().GetTriple().isRISCV()) + m_opcode.DumpRISCV(&ss, max_opcode_byte_size * 3 + 1); + else + m_opcode.Dump(&ss, max_opcode_byte_size * 3 + 1); + else m_opcode.Dump(&ss, 15 * 3 + 1); } else { // Else, we have ARM or MIPS which can show up to a uint32_t 0x00000000 @@ -685,10 +690,13 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, } } const size_t opcode_pos = ss.GetSizeOfLastLine(); - const std::string &opcode_name = + std::string &opcode_name = show_color ? m_markup_opcode_name : m_opcode_name; const std::string &mnemonics = show_color ? m_markup_mnemonics : m_mnemonics; + if (opcode_name.empty()) + opcode_name = ""; + // The default opcode size of 7 characters is plenty for most architectures // but some like arm can pull out the occasional vqrshrun.s16. We won't get // consistent column spacing in these cases, unfortunately. Also note that we diff --git a/lldb/source/Core/Opcode.cpp b/lldb/source/Core/Opcode.cpp index 3e30d98975d8a..dbcd18cc0d8d2 100644 --- a/lldb/source/Core/Opcode.cpp +++ b/lldb/source/Core/Opcode.cpp @@ -78,6 +78,44 @@ lldb::ByteOrder Opcode::GetDataByteOrder() const { return eByteOrderInvalid; } +// make RISC-V byte dumps look like llvm-objdump, instead of just dumping bytes +int Opcode::DumpRISCV(Stream *s, uint32_t min_byte_width) { + const uint32_t previous_bytes = s->GetWrittenBytes(); + // if m_type is not bytes, call Dump + if (m_type != Opcode::eTypeBytes) + return Dump(s, min_byte_width); + + // from RISCVPrettyPrinter in llvm-objdump.cpp + // if size % 4 == 0, print as 1 or 2 32 bit values (32 or 64 bit inst) + // else if size % 2 == 0, print as 1 or 3 16 bit values (16 or 48 bit inst) + // else fall back and print bytes + for (uint32_t i = 0; i < m_data.inst.length;) { + if (i > 0) + s->PutChar(' '); + if (!(m_data.inst.length % 4)) { + s->Printf("%2.2x%2.2x%2.2x%2.2x", m_data.inst.bytes[i + 3], + m_data.inst.bytes[i + 2], + m_data.inst.bytes[i + 1], + m_data.inst.bytes[i + 0]); + i += 4; + } else if (!(m_data.inst.length % 2)) { + s->Printf("%2.2x%2.2x", m_data.inst.bytes[i + 1], + m_data.inst.bytes[i + 0]); + i += 2; + } else { + s->Printf("%2.2x", m_data.inst.bytes[i]); + ++i; + } + } + + uint32_t bytes_written_so_far = s->GetWrittenBytes() - previous_bytes; + // Add spaces to make sure bytes display comes out even in case opcodes aren't + // all the same size. + if (bytes_written_so_far < min_byte_width) + s->Printf("%*s", min_byte_width - bytes_written_so_far, ""); + return s->GetWrittenBytes() - previous_bytes; +} + uint32_t Opcode::GetData(DataExtractor &data) const { uint32_t byte_size = GetByteSize(); uint8_t swap_buf[8]; diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index ed6047f8f4ef3..eeb6020abd73a 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -61,6 +61,8 @@ class DisassemblerLLVMC::MCDisasmInstance { uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, llvm::MCInst &mc_inst) const; + bool GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, + lldb::addr_t pc, llvm::MCInst &mc_inst, size_t &size) const; void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string, std::string &comments_string); void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); @@ -524,11 +526,11 @@ class InstructionLLVMC : public lldb_private::Instruction { const addr_t pc = m_address.GetFileAddress(); llvm::MCInst inst; - const size_t inst_size = - mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); - if (inst_size == 0) - m_opcode.Clear(); - else { + size_t inst_size = 0; + m_is_valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, + pc, inst, inst_size); + m_opcode.Clear(); + if (inst_size != 0) { m_opcode.SetOpcodeBytes(opcode_data, inst_size); m_is_valid = true; } @@ -604,10 +606,11 @@ class InstructionLLVMC : public lldb_private::Instruction { const uint8_t *opcode_data = data.GetDataStart(); const size_t opcode_data_len = data.GetByteSize(); llvm::MCInst inst; - size_t inst_size = - mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); - - if (inst_size > 0) { + size_t inst_size = 0; + bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, + inst, inst_size); + + if (valid && inst_size > 0) { mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); const bool saved_use_color = mc_disasm_ptr->GetUseColor(); @@ -1206,9 +1209,10 @@ class InstructionLLVMC : public lldb_private::Instruction { const uint8_t *opcode_data = data.GetDataStart(); const size_t opcode_data_len = data.GetByteSize(); llvm::MCInst inst; - const size_t inst_size = - mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); - if (inst_size == 0) + size_t inst_size = 0; + const bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, + pc, inst, inst_size); + if (!valid) return; m_has_visited_instruction = true; @@ -1337,19 +1341,18 @@ DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); } -uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( +bool DisassemblerLLVMC::MCDisasmInstance::GetMCInst( const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, - llvm::MCInst &mc_inst) const { + llvm::MCInst &mc_inst, size_t &size) const { llvm::ArrayRef data(opcode_data, opcode_data_len); llvm::MCDisassembler::DecodeStatus status; - uint64_t new_inst_size; - status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, + status = m_disasm_up->getInstruction(mc_inst, size, data, pc, llvm::nulls()); if (status == llvm::MCDisassembler::Success) - return new_inst_size; + return true; else - return 0; + return false; } void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp index 70b9800f4dade..7c71aaae6bcf2 100644 --- a/lldb/source/Utility/ArchSpec.cpp +++ b/lldb/source/Utility/ArchSpec.cpp @@ -228,9 +228,9 @@ static const CoreDefinition g_core_definitions[] = { {eByteOrderLittle, 4, 4, 4, llvm::Triple::hexagon, ArchSpec::eCore_hexagon_hexagonv5, "hexagonv5"}, - {eByteOrderLittle, 4, 2, 4, llvm::Triple::riscv32, ArchSpec::eCore_riscv32, + {eByteOrderLittle, 4, 2, 8, llvm::Triple::riscv32, ArchSpec::eCore_riscv32, "riscv32"}, - {eByteOrderLittle, 8, 2, 4, llvm::Triple::riscv64, ArchSpec::eCore_riscv64, + {eByteOrderLittle, 8, 2, 8, llvm::Triple::riscv64, ArchSpec::eCore_riscv64, "riscv64"}, {eByteOrderLittle, 4, 4, 4, llvm::Triple::loongarch32,