mirror of
https://github.com/espressif/binutils-gdb.git
synced 2025-06-17 16:05:56 +08:00
gdb: disassembler opcode display formatting
This commit changes the format of 'disassemble /r' to match GNU objdump. Specifically, GDB will now display the instruction bytes in as 'objdump --wide --disassemble' does. Here is an example for RISC-V before this patch: (gdb) disassemble /r 0x0001018e,0x0001019e Dump of assembler code from 0x1018e to 0x1019e: 0x0001018e <call_me+66>: 03 26 84 fe lw a2,-24(s0) 0x00010192 <call_me+70>: 83 25 c4 fe lw a1,-20(s0) 0x00010196 <call_me+74>: 61 65 lui a0,0x18 0x00010198 <call_me+76>: 13 05 85 6a addi a0,a0,1704 0x0001019c <call_me+80>: f1 22 jal 0x10368 <printf> End of assembler dump. And here's an example after this patch: (gdb) disassemble /r 0x0001018e,0x0001019e Dump of assembler code from 0x1018e to 0x1019e: 0x0001018e <call_me+66>: fe842603 lw a2,-24(s0) 0x00010192 <call_me+70>: fec42583 lw a1,-20(s0) 0x00010196 <call_me+74>: 6561 lui a0,0x18 0x00010198 <call_me+76>: 6a850513 addi a0,a0,1704 0x0001019c <call_me+80>: 22f1 jal 0x10368 <printf> End of assembler dump. There are two differences here. First, the instruction bytes after the patch are grouped based on the size of the instruction, and are byte-swapped to little-endian order. Second, after the patch, GDB now uses the bytes-per-line hint from libopcodes to add whitespace padding after the opcode bytes, this means that in most cases the instructions are nicely aligned. It is still possible for a very long instruction to intrude into the disassembled text space. The next example is x86-64, before the patch: (gdb) disassemble /r main Dump of assembler code for function main: 0x0000000000401106 <+0>: 55 push %rbp 0x0000000000401107 <+1>: 48 89 e5 mov %rsp,%rbp 0x000000000040110a <+4>: c7 87 d8 00 00 00 01 00 00 00 movl $0x1,0xd8(%rdi) 0x0000000000401114 <+14>: b8 00 00 00 00 mov $0x0,%eax 0x0000000000401119 <+19>: 5d pop %rbp 0x000000000040111a <+20>: c3 ret End of assembler dump. And after the patch: (gdb) disassemble /r main Dump of assembler code for function main: 0x0000000000401106 <+0>: 55 push %rbp 0x0000000000401107 <+1>: 48 89 e5 mov %rsp,%rbp 0x000000000040110a <+4>: c7 87 d8 00 00 00 01 00 00 00 movl $0x1,0xd8(%rdi) 0x0000000000401114 <+14>: b8 00 00 00 00 mov $0x0,%eax 0x0000000000401119 <+19>: 5d pop %rbp 0x000000000040111a <+20>: c3 ret End of assembler dump. Most instructions are aligned, except for the very long instruction. Notice too that for x86-64 libopcodes doesn't request that GDB group the instruction bytes. This matches the behaviour of objdump. In case the user really wants the old behaviour, I have added a new modifier 'disassemble /b', this displays the instruction byte at a time. For x86-64, which never groups instruction bytes, /b and /r are equivalent, but for RISC-V, using /b gets the old layout back (except that the whitespace for alignment is still present). Consider our original RISC-V example, this time using /b: (gdb) disassemble /b 0x0001018e,0x0001019e Dump of assembler code from 0x1018e to 0x1019e: 0x0001018e <call_me+66>: 03 26 84 fe lw a2,-24(s0) 0x00010192 <call_me+70>: 83 25 c4 fe lw a1,-20(s0) 0x00010196 <call_me+74>: 61 65 lui a0,0x18 0x00010198 <call_me+76>: 13 05 85 6a addi a0,a0,1704 0x0001019c <call_me+80>: f1 22 jal 0x10368 <printf> End of assembler dump. Obviously, this patch is a potentially significant change to the behaviour or /r. I could have added /b with the new behaviour and left /r alone. However, personally, I feel the new behaviour is significantly better than the old, hence, I made /r be what I consider the "better" behaviour. The reason I prefer the new behaviour is that, when I use /r, I almost always want to manually decode the instruction for some reason, and having the bytes displayed in "instruction order" rather than memory order, just makes this easier. The 'record instruction-history' command also takes a /r modifier, and has been modified in the same way as disassemble; /r gets the new behaviour, and /b has been added to retain the old behaviour. Finally, the MI command -data-disassemble, is unchanged in behaviour, this command now requests the raw bytes of the instruction, which is equivalent to the /b modifier. This means that the MI output will remain backward compatible.
This commit is contained in:
12
gdb/NEWS
12
gdb/NEWS
@ -59,6 +59,18 @@
|
|||||||
|
|
||||||
* gdb now supports zstd compressed debug sections (ELFCOMPRESS_ZSTD) for ELF.
|
* gdb now supports zstd compressed debug sections (ELFCOMPRESS_ZSTD) for ELF.
|
||||||
|
|
||||||
|
* The format of 'disassemble /r' and 'record instruction-history /r'
|
||||||
|
has changed. The instruction bytes could now be grouped together,
|
||||||
|
and displayed in the endianness of the instruction. This is the
|
||||||
|
same layout as used by GNU objdump when disassembling.
|
||||||
|
|
||||||
|
There is now 'disassemble /b' and 'record instruction-history /b'
|
||||||
|
which will always display the instructions bytes one at a time in
|
||||||
|
memory order, that is, the byte at the lowest address first.
|
||||||
|
|
||||||
|
For both /r and /b GDB is now better at using whitespace in order to
|
||||||
|
align the disassembled instruction text.
|
||||||
|
|
||||||
* New commands
|
* New commands
|
||||||
|
|
||||||
maintenance set ignore-prologue-end-flag on|off
|
maintenance set ignore-prologue-end-flag on|off
|
||||||
|
@ -1508,6 +1508,9 @@ disassemble_current_function (gdb_disassembly_flags flags)
|
|||||||
|
|
||||||
A /r modifier will include raw instructions in hex with the assembly.
|
A /r modifier will include raw instructions in hex with the assembly.
|
||||||
|
|
||||||
|
A /b modifier is similar to /r except the instruction bytes are printed
|
||||||
|
as separate bytes with no grouping, or endian switching.
|
||||||
|
|
||||||
A /s modifier will include source code with the assembly, like /m, with
|
A /s modifier will include source code with the assembly, like /m, with
|
||||||
two important differences:
|
two important differences:
|
||||||
1) The output is still in pc address order.
|
1) The output is still in pc address order.
|
||||||
@ -1546,6 +1549,9 @@ disassemble_command (const char *arg, int from_tty)
|
|||||||
case 'r':
|
case 'r':
|
||||||
flags |= DISASSEMBLY_RAW_INSN;
|
flags |= DISASSEMBLY_RAW_INSN;
|
||||||
break;
|
break;
|
||||||
|
case 'b':
|
||||||
|
flags |= DISASSEMBLY_RAW_BYTES;
|
||||||
|
break;
|
||||||
case 's':
|
case 's':
|
||||||
flags |= DISASSEMBLY_SOURCE;
|
flags |= DISASSEMBLY_SOURCE;
|
||||||
break;
|
break;
|
||||||
|
@ -33,6 +33,7 @@ enum gdb_disassembly_flag
|
|||||||
DISASSEMBLY_OMIT_PC = (0x1 << 4),
|
DISASSEMBLY_OMIT_PC = (0x1 << 4),
|
||||||
DISASSEMBLY_SOURCE = (0x1 << 5),
|
DISASSEMBLY_SOURCE = (0x1 << 5),
|
||||||
DISASSEMBLY_SPECULATIVE = (0x1 << 6),
|
DISASSEMBLY_SPECULATIVE = (0x1 << 6),
|
||||||
|
DISASSEMBLY_RAW_BYTES = (0x1 << 7),
|
||||||
};
|
};
|
||||||
DEF_ENUM_FLAGS_TYPE (enum gdb_disassembly_flag, gdb_disassembly_flags);
|
DEF_ENUM_FLAGS_TYPE (enum gdb_disassembly_flag, gdb_disassembly_flags);
|
||||||
|
|
||||||
|
43
gdb/disasm.c
43
gdb/disasm.c
@ -457,7 +457,7 @@ gdb_pretty_print_disassembler::pretty_print_insn (const struct disasm_insn *insn
|
|||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & DISASSEMBLY_RAW_INSN)
|
if ((flags & (DISASSEMBLY_RAW_INSN | DISASSEMBLY_RAW_BYTES)) != 0)
|
||||||
{
|
{
|
||||||
/* Build the opcodes using a temporary stream so we can
|
/* Build the opcodes using a temporary stream so we can
|
||||||
write them out in a single go for the MI. */
|
write them out in a single go for the MI. */
|
||||||
@ -467,14 +467,51 @@ gdb_pretty_print_disassembler::pretty_print_insn (const struct disasm_insn *insn
|
|||||||
m_opcode_data.resize (size);
|
m_opcode_data.resize (size);
|
||||||
read_code (pc, m_opcode_data.data (), size);
|
read_code (pc, m_opcode_data.data (), size);
|
||||||
|
|
||||||
for (int i = 0; i < size; ++i)
|
/* The disassembler provides information about the best way to
|
||||||
|
display the instruction bytes to the user. We provide some sane
|
||||||
|
defaults in case the disassembler gets it wrong. */
|
||||||
|
const struct disassemble_info *di = m_di.disasm_info ();
|
||||||
|
int bytes_per_line = std::max (di->bytes_per_line, size);
|
||||||
|
int bytes_per_chunk = std::max (di->bytes_per_chunk, 1);
|
||||||
|
|
||||||
|
/* If the user has requested the instruction bytes be displayed
|
||||||
|
byte at a time, then handle that here. Also, if the instruction
|
||||||
|
is not a multiple of the chunk size (which probably indicates a
|
||||||
|
disassembler problem) then avoid that causing display problems
|
||||||
|
by switching to byte at a time mode. */
|
||||||
|
if ((flags & DISASSEMBLY_RAW_BYTES) != 0
|
||||||
|
|| (size % bytes_per_chunk) != 0)
|
||||||
|
bytes_per_chunk = 1;
|
||||||
|
|
||||||
|
/* Print the instruction opcodes bytes, grouped into chunks. */
|
||||||
|
for (int i = 0; i < size; i += bytes_per_chunk)
|
||||||
{
|
{
|
||||||
if (i > 0)
|
if (i > 0)
|
||||||
m_opcode_stb.puts (" ");
|
m_opcode_stb.puts (" ");
|
||||||
m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i]);
|
|
||||||
|
if (di->display_endian == BFD_ENDIAN_LITTLE)
|
||||||
|
{
|
||||||
|
for (int k = bytes_per_chunk; k-- != 0; )
|
||||||
|
m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i + k]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int k = 0; k < bytes_per_chunk; k++)
|
||||||
|
m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i + k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Calculate required padding. */
|
||||||
|
int nspaces = 0;
|
||||||
|
for (int i = size; i < bytes_per_line; i += bytes_per_chunk)
|
||||||
|
{
|
||||||
|
if (i > size)
|
||||||
|
nspaces++;
|
||||||
|
nspaces += bytes_per_chunk * 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_uiout->field_stream ("opcodes", m_opcode_stb);
|
m_uiout->field_stream ("opcodes", m_opcode_stb);
|
||||||
|
m_uiout->spaces (nspaces);
|
||||||
m_uiout->text ("\t");
|
m_uiout->text ("\t");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7945,7 +7945,10 @@ are printed in execution order.
|
|||||||
|
|
||||||
It can also print mixed source+disassembly if you specify the the
|
It can also print mixed source+disassembly if you specify the the
|
||||||
@code{/m} or @code{/s} modifier, and print the raw instructions in hex
|
@code{/m} or @code{/s} modifier, and print the raw instructions in hex
|
||||||
as well as in symbolic form by specifying the @code{/r} modifier.
|
as well as in symbolic form by specifying the @code{/r} or @code{/b}
|
||||||
|
modifier. The behaviour of the @code{/m}, @code{/s}, @code{/r}, and
|
||||||
|
@code{/b} modifiers are the same as for the @kbd{disassemble} command
|
||||||
|
(@pxref{disassemble,,@kbd{disassemble}}).
|
||||||
|
|
||||||
The current position marker is printed for the instruction at the
|
The current position marker is printed for the instruction at the
|
||||||
current program counter value. This instruction can appear multiple
|
current program counter value. This instruction can appear multiple
|
||||||
@ -9859,6 +9862,7 @@ After @code{info line}, using @code{info line} again without
|
|||||||
specifying a location will display information about the next source
|
specifying a location will display information about the next source
|
||||||
line.
|
line.
|
||||||
|
|
||||||
|
@anchor{disassemble}
|
||||||
@table @code
|
@table @code
|
||||||
@kindex disassemble
|
@kindex disassemble
|
||||||
@cindex assembly instructions
|
@cindex assembly instructions
|
||||||
@ -9869,16 +9873,17 @@ line.
|
|||||||
@itemx disassemble /m
|
@itemx disassemble /m
|
||||||
@itemx disassemble /s
|
@itemx disassemble /s
|
||||||
@itemx disassemble /r
|
@itemx disassemble /r
|
||||||
|
@itemx disassemble /b
|
||||||
This specialized command dumps a range of memory as machine
|
This specialized command dumps a range of memory as machine
|
||||||
instructions. It can also print mixed source+disassembly by specifying
|
instructions. It can also print mixed source+disassembly by specifying
|
||||||
the @code{/m} or @code{/s} modifier and print the raw instructions in hex
|
the @code{/m} or @code{/s} modifier and print the raw instructions in
|
||||||
as well as in symbolic form by specifying the @code{/r} modifier.
|
hex as well as in symbolic form by specifying the @code{/r} or @code{/b}
|
||||||
The default memory range is the function surrounding the
|
modifier. The default memory range is the function surrounding the
|
||||||
program counter of the selected frame. A single argument to this
|
program counter of the selected frame. A single argument to this
|
||||||
command is a program counter value; @value{GDBN} dumps the function
|
command is a program counter value; @value{GDBN} dumps the function
|
||||||
surrounding this value. When two arguments are given, they should
|
surrounding this value. When two arguments are given, they should be
|
||||||
be separated by a comma, possibly surrounded by whitespace. The
|
separated by a comma, possibly surrounded by whitespace. The arguments
|
||||||
arguments specify a range of addresses to dump, in one of two forms:
|
specify a range of addresses to dump, in one of two forms:
|
||||||
|
|
||||||
@table @code
|
@table @code
|
||||||
@item @var{start},@var{end}
|
@item @var{start},@var{end}
|
||||||
@ -9916,6 +9921,35 @@ Dump of assembler code from 0x32c4 to 0x32e4:
|
|||||||
End of assembler dump.
|
End of assembler dump.
|
||||||
@end smallexample
|
@end smallexample
|
||||||
|
|
||||||
|
The following two examples are for RISC-V, and demonstrates the
|
||||||
|
difference between the @code{/r} and @code{/b} modifiers. First with
|
||||||
|
@code{/b}, the bytes of the instruction are printed, in hex, in memory
|
||||||
|
order:
|
||||||
|
|
||||||
|
@smallexample
|
||||||
|
(@value{GDBP}) disassemble /b 0x00010150,0x0001015c
|
||||||
|
Dump of assembler code from 0x10150 to 0x1015c:
|
||||||
|
0x00010150 <call_me+4>: 22 dc sw s0,56(sp)
|
||||||
|
0x00010152 <call_me+6>: 80 00 addi s0,sp,64
|
||||||
|
0x00010154 <call_me+8>: 23 26 a4 fe sw a0,-20(s0)
|
||||||
|
0x00010158 <call_me+12>: 23 24 b4 fe sw a1,-24(s0)
|
||||||
|
End of assembler dump.
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
In contrast, with @code{/r} the bytes of the instruction are displayed
|
||||||
|
in the instruction order, for RISC-V this means that the bytes have been
|
||||||
|
swapped to little-endian order:
|
||||||
|
|
||||||
|
@smallexample
|
||||||
|
(@value{GDBP}) disassemble /r 0x00010150,0x0001015c
|
||||||
|
Dump of assembler code from 0x10150 to 0x1015c:
|
||||||
|
0x00010150 <call_me+4>: dc22 sw s0,56(sp)
|
||||||
|
0x00010152 <call_me+6>: 0080 addi s0,sp,64
|
||||||
|
0x00010154 <call_me+8>: fea42623 sw a0,-20(s0)
|
||||||
|
0x00010158 <call_me+12>: feb42423 sw a1,-24(s0)
|
||||||
|
End of assembler dump.
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
Here is an example showing mixed source+assembly for Intel x86
|
Here is an example showing mixed source+assembly for Intel x86
|
||||||
with @code{/m} or @code{/s}, when the program is stopped just after
|
with @code{/m} or @code{/s}, when the program is stopped just after
|
||||||
function prologue in a non-optimized function with no inline code.
|
function prologue in a non-optimized function with no inline code.
|
||||||
|
@ -165,16 +165,16 @@ mi_cmd_disassemble (const char *command, char **argv, int argc)
|
|||||||
disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED;
|
disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
disasm_flags |= DISASSEMBLY_RAW_INSN;
|
disasm_flags |= DISASSEMBLY_RAW_BYTES;
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED | DISASSEMBLY_RAW_INSN;
|
disasm_flags |= DISASSEMBLY_SOURCE_DEPRECATED | DISASSEMBLY_RAW_BYTES;
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
disasm_flags |= DISASSEMBLY_SOURCE;
|
disasm_flags |= DISASSEMBLY_SOURCE;
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
disasm_flags |= DISASSEMBLY_SOURCE | DISASSEMBLY_RAW_INSN;
|
disasm_flags |= DISASSEMBLY_SOURCE | DISASSEMBLY_RAW_BYTES;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
gdb_assert_not_reached ("bad disassembly mode");
|
gdb_assert_not_reached ("bad disassembly mode");
|
||||||
|
@ -494,6 +494,9 @@ get_insn_history_modifiers (const char **arg)
|
|||||||
case 'r':
|
case 'r':
|
||||||
modifiers |= DISASSEMBLY_RAW_INSN;
|
modifiers |= DISASSEMBLY_RAW_INSN;
|
||||||
break;
|
break;
|
||||||
|
case 'b':
|
||||||
|
modifiers |= DISASSEMBLY_RAW_BYTES;
|
||||||
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
modifiers |= DISASSEMBLY_OMIT_FNAME;
|
modifiers |= DISASSEMBLY_OMIT_FNAME;
|
||||||
break;
|
break;
|
||||||
|
@ -245,12 +245,12 @@ proc test_disassembly_opcode_format {} {
|
|||||||
# then disassemble using the MI command.
|
# then disassemble using the MI command.
|
||||||
set longest_insn_bytes ""
|
set longest_insn_bytes ""
|
||||||
set longest_insn_addr ""
|
set longest_insn_addr ""
|
||||||
gdb_test_multiple "disassemble /r main" "" {
|
gdb_test_multiple "disassemble /b main" "" {
|
||||||
-re "^disassemble /r main\r\n" {
|
-re "^disassemble /b main\r\n" {
|
||||||
exp_continue
|
exp_continue
|
||||||
}
|
}
|
||||||
|
|
||||||
-re "^&\"disassemble /r main.n\"\r\n" {
|
-re "^&\"disassemble /b main.n\"\r\n" {
|
||||||
exp_continue
|
exp_continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user