mirror of
https://github.com/espressif/binutils-gdb.git
synced 2025-10-18 05:12:33 +08:00
Add --unicode option to control how unicode characters are handled by display tools.
* nm.c: Add --unicode option to control how unicode characters are handled. * objdump.c: Likewise. * readelf.c: Likewise. * strings.c: Likewise. * binutils.texi: Document the new feature. * NEWS: Document the new feature. * testsuite/binutils-all/unicode.exp: New file. * testsuite/binutils-all/nm.hex.unicode * testsuite/binutils-all/strings.escape.unicode * testsuite/binutils-all/objdump.highlight.unicode * testsuite/binutils-all/readelf.invalid.unicode
This commit is contained in:
228
binutils/nm.c
228
binutils/nm.c
@ -38,6 +38,11 @@
|
||||
#include "bucomm.h"
|
||||
#include "plugin-api.h"
|
||||
#include "plugin.h"
|
||||
#include "safe-ctype.h"
|
||||
|
||||
#ifndef streq
|
||||
#define streq(a,b) (strcmp ((a),(b)) == 0)
|
||||
#endif
|
||||
|
||||
/* When sorting by size, we use this structure to hold the size and a
|
||||
pointer to the minisymbol. */
|
||||
@ -216,6 +221,18 @@ static const char *plugin_target = NULL;
|
||||
static bfd *lineno_cache_bfd;
|
||||
static bfd *lineno_cache_rel_bfd;
|
||||
|
||||
typedef enum unicode_display_type
|
||||
{
|
||||
unicode_default = 0,
|
||||
unicode_locale,
|
||||
unicode_escape,
|
||||
unicode_hex,
|
||||
unicode_highlight,
|
||||
unicode_invalid
|
||||
} unicode_display_type;
|
||||
|
||||
static unicode_display_type unicode_display = unicode_default;
|
||||
|
||||
enum long_option_values
|
||||
{
|
||||
OPTION_TARGET = 200,
|
||||
@ -260,6 +277,7 @@ static struct option long_options[] =
|
||||
{"target", required_argument, 0, OPTION_TARGET},
|
||||
{"defined-only", no_argument, &defined_only, 1},
|
||||
{"undefined-only", no_argument, &undefined_only, 1},
|
||||
{"unicode", required_argument, NULL, 'U'},
|
||||
{"version", no_argument, &show_version, 1},
|
||||
{"with-symbol-versions", no_argument, &with_symbol_versions, 1},
|
||||
{"without-symbol-versions", no_argument, &with_symbol_versions, 0},
|
||||
@ -313,6 +331,8 @@ usage (FILE *stream, int status)
|
||||
-t, --radix=RADIX Use RADIX for printing symbol values\n\
|
||||
--target=BFDNAME Specify the target object format as BFDNAME\n\
|
||||
-u, --undefined-only Display only undefined symbols\n\
|
||||
-U {d|s|i|x|e|h} Specify how to treat UTF-8 encoded unicode characters\n\
|
||||
--unicode={default|show|invalid|hex|escape|highlight}\n\
|
||||
--with-symbol-versions Display version strings after symbol names\n\
|
||||
-X 32_64 (ignored)\n\
|
||||
@FILE Read options from FILE\n\
|
||||
@ -432,6 +452,187 @@ get_coff_symbol_type (const struct internal_syment *sym)
|
||||
return bufp;
|
||||
}
|
||||
|
||||
/* Convert a potential UTF-8 encoded sequence in IN into characters in OUT.
|
||||
The conversion format is controlled by the unicode_display variable.
|
||||
Returns the number of characters added to OUT.
|
||||
Returns the number of bytes consumed from IN in CONSUMED.
|
||||
Always consumes at least one byte and displays at least one character. */
|
||||
|
||||
static unsigned int
|
||||
display_utf8 (const unsigned char * in, char * out, unsigned int * consumed)
|
||||
{
|
||||
char * orig_out = out;
|
||||
unsigned int nchars = 0;
|
||||
unsigned int j;
|
||||
|
||||
if (unicode_display == unicode_default)
|
||||
goto invalid;
|
||||
|
||||
if (in[0] < 0xc0)
|
||||
goto invalid;
|
||||
|
||||
if ((in[1] & 0xc0) != 0x80)
|
||||
goto invalid;
|
||||
|
||||
if ((in[0] & 0x20) == 0)
|
||||
{
|
||||
nchars = 2;
|
||||
goto valid;
|
||||
}
|
||||
|
||||
if ((in[2] & 0xc0) != 0x80)
|
||||
goto invalid;
|
||||
|
||||
if ((in[0] & 0x10) == 0)
|
||||
{
|
||||
nchars = 3;
|
||||
goto valid;
|
||||
}
|
||||
|
||||
if ((in[3] & 0xc0) != 0x80)
|
||||
goto invalid;
|
||||
|
||||
nchars = 4;
|
||||
|
||||
valid:
|
||||
switch (unicode_display)
|
||||
{
|
||||
case unicode_locale:
|
||||
/* Copy the bytes into the output buffer as is. */
|
||||
memcpy (out, in, nchars);
|
||||
out += nchars;
|
||||
break;
|
||||
|
||||
case unicode_invalid:
|
||||
case unicode_hex:
|
||||
out += sprintf (out, "%c", unicode_display == unicode_hex ? '<' : '{');
|
||||
out += sprintf (out, "0x");
|
||||
for (j = 0; j < nchars; j++)
|
||||
out += sprintf (out, "%02x", in [j]);
|
||||
out += sprintf (out, "%c", unicode_display == unicode_hex ? '>' : '}');
|
||||
break;
|
||||
|
||||
case unicode_highlight:
|
||||
if (isatty (1))
|
||||
out += sprintf (out, "\x1B[31;47m"); /* Red. */
|
||||
/* Fall through. */
|
||||
case unicode_escape:
|
||||
switch (nchars)
|
||||
{
|
||||
case 2:
|
||||
out += sprintf (out, "\\u%02x%02x",
|
||||
((in[0] & 0x1c) >> 2),
|
||||
((in[0] & 0x03) << 6) | (in[1] & 0x3f));
|
||||
break;
|
||||
|
||||
case 3:
|
||||
out += sprintf (out, "\\u%02x%02x",
|
||||
((in[0] & 0x0f) << 4) | ((in[1] & 0x3c) >> 2),
|
||||
((in[1] & 0x03) << 6) | ((in[2] & 0x3f)));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
out += sprintf (out, "\\u%02x%02x%02x",
|
||||
((in[0] & 0x07) << 6) | ((in[1] & 0x3c) >> 2),
|
||||
((in[1] & 0x03) << 6) | ((in[2] & 0x3c) >> 2),
|
||||
((in[2] & 0x03) << 6) | ((in[3] & 0x3f)));
|
||||
break;
|
||||
default:
|
||||
/* URG. */
|
||||
break;
|
||||
}
|
||||
|
||||
if (unicode_display == unicode_highlight && isatty (1))
|
||||
out += sprintf (out, "\033[0m"); /* Default colour. */
|
||||
break;
|
||||
|
||||
default:
|
||||
/* URG */
|
||||
break;
|
||||
}
|
||||
|
||||
* consumed = nchars;
|
||||
return out - orig_out;
|
||||
|
||||
invalid:
|
||||
/* Not a valid UTF-8 sequence. */
|
||||
*out = *in;
|
||||
* consumed = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Convert any UTF-8 encoded characters in NAME into the form specified by
|
||||
unicode_display. Also converts control characters. Returns a static
|
||||
buffer if conversion was necessary.
|
||||
Code stolen from objdump.c:sanitize_string(). */
|
||||
|
||||
static const char *
|
||||
convert_utf8 (const char * in)
|
||||
{
|
||||
static char * buffer = NULL;
|
||||
static size_t buffer_len = 0;
|
||||
const char * original = in;
|
||||
char * out;
|
||||
|
||||
/* Paranoia. */
|
||||
if (in == NULL)
|
||||
return "";
|
||||
|
||||
/* See if any conversion is necessary.
|
||||
In the majority of cases it will not be needed. */
|
||||
do
|
||||
{
|
||||
unsigned char c = *in++;
|
||||
|
||||
if (c == 0)
|
||||
return original;
|
||||
|
||||
if (ISCNTRL (c))
|
||||
break;
|
||||
|
||||
if (unicode_display != unicode_default && c >= 0xc0)
|
||||
break;
|
||||
}
|
||||
while (1);
|
||||
|
||||
/* Copy the input, translating as needed. */
|
||||
in = original;
|
||||
if (buffer_len < (strlen (in) * 9))
|
||||
{
|
||||
free ((void *) buffer);
|
||||
buffer_len = strlen (in) * 9;
|
||||
buffer = xmalloc (buffer_len + 1);
|
||||
}
|
||||
|
||||
out = buffer;
|
||||
do
|
||||
{
|
||||
unsigned char c = *in++;
|
||||
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if (ISCNTRL (c))
|
||||
{
|
||||
*out++ = '^';
|
||||
*out++ = c + 0x40;
|
||||
}
|
||||
else if (unicode_display != unicode_default && c >= 0xc0)
|
||||
{
|
||||
unsigned int num_consumed;
|
||||
|
||||
out += display_utf8 ((const unsigned char *)(in - 1), out, & num_consumed);
|
||||
in += num_consumed - 1;
|
||||
}
|
||||
else
|
||||
*out++ = c;
|
||||
}
|
||||
while (1);
|
||||
|
||||
*out = 0;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/* Print symbol name NAME, read from ABFD, with printf format FORM,
|
||||
demangling it if requested. */
|
||||
|
||||
@ -444,6 +645,7 @@ print_symname (const char *form, struct extended_symbol_info *info,
|
||||
|
||||
if (name == NULL)
|
||||
name = info->sinfo->name;
|
||||
|
||||
if (!with_symbol_versions
|
||||
&& bfd_get_flavour (abfd) == bfd_target_elf_flavour)
|
||||
{
|
||||
@ -451,6 +653,7 @@ print_symname (const char *form, struct extended_symbol_info *info,
|
||||
if (atver)
|
||||
*atver = 0;
|
||||
}
|
||||
|
||||
if (do_demangle && *name)
|
||||
{
|
||||
alloc = bfd_demangle (abfd, name, demangle_flags);
|
||||
@ -458,6 +661,11 @@ print_symname (const char *form, struct extended_symbol_info *info,
|
||||
name = alloc;
|
||||
}
|
||||
|
||||
if (unicode_display != unicode_default)
|
||||
{
|
||||
name = convert_utf8 (name);
|
||||
}
|
||||
|
||||
if (info != NULL && info->elfinfo && with_symbol_versions)
|
||||
{
|
||||
const char *version_string;
|
||||
@ -1808,7 +2016,7 @@ main (int argc, char **argv)
|
||||
fatal (_("fatal error: libbfd ABI mismatch"));
|
||||
set_default_bfd_target ();
|
||||
|
||||
while ((c = getopt_long (argc, argv, "aABCDef:gHhjJlnopPrSst:uvVvX:",
|
||||
while ((c = getopt_long (argc, argv, "aABCDef:gHhjJlnopPrSst:uU:vVvX:",
|
||||
long_options, (int *) 0)) != EOF)
|
||||
{
|
||||
switch (c)
|
||||
@ -1901,6 +2109,24 @@ main (int argc, char **argv)
|
||||
case 'u':
|
||||
undefined_only = 1;
|
||||
break;
|
||||
|
||||
case 'U':
|
||||
if (streq (optarg, "default") || streq (optarg, "d"))
|
||||
unicode_display = unicode_default;
|
||||
else if (streq (optarg, "locale") || streq (optarg, "l"))
|
||||
unicode_display = unicode_locale;
|
||||
else if (streq (optarg, "escape") || streq (optarg, "e"))
|
||||
unicode_display = unicode_escape;
|
||||
else if (streq (optarg, "invalid") || streq (optarg, "i"))
|
||||
unicode_display = unicode_invalid;
|
||||
else if (streq (optarg, "hex") || streq (optarg, "x"))
|
||||
unicode_display = unicode_hex;
|
||||
else if (streq (optarg, "highlight") || streq (optarg, "h"))
|
||||
unicode_display = unicode_highlight;
|
||||
else
|
||||
fatal (_("invalid argument to -U/--unicode: %s"), optarg);
|
||||
break;
|
||||
|
||||
case 'V':
|
||||
show_version = 1;
|
||||
break;
|
||||
|
Reference in New Issue
Block a user