gas: rework handling of backslashes in quoted symbol names

Strange effects can result from the present handling, e.g.:

.if 1
"backslash\\":
.endif

yields first (correctly) "missing closing `"'" but then also "invalid
character '\' in mnemonic" and further "end of file inside conditional".
Symbols names ending in \ are in principle not expressable with that
scheme.

Instead of recording whether a backslash was seen, inspect the
subsequent character right away. Only accept \\ (meaning a single
backslash in the resulting symbol name) and \" (meaning an embedded
double quote in the resulting symbol name) for now, warning about any
other combination.

While perhaps not necessary immediately, also permit concatenated
strings to form a symbol name. This may become useful if going forward
we would want to support \<octal> or \x<hex> sequences, where closing
and re-opening quotes can be useful to delimit such sequences.

The ELF "Multibyte symbol names" test gets switched away from using
.set, as that would now also mean excluding nios2 and pru. By using
.equiv instead, even the existing #notarget can be dropped. (For h8300
the .section directive additionally needs attributes specified, to avoid
a target specific warning.)
This commit is contained in:
Jan Beulich
2022-01-04 10:05:17 +01:00
parent 79541a6d92
commit 5ed4d49d10
5 changed files with 69 additions and 15 deletions

View File

@ -2400,18 +2400,52 @@ get_symbol_name (char ** ilp_return)
}
else if (c == '"')
{
bool backslash_seen;
char *dst = input_line_pointer;
* ilp_return = input_line_pointer;
do
for (;;)
{
backslash_seen = c == '\\';
c = * input_line_pointer ++;
}
while (c != 0 && (c != '"' || backslash_seen));
c = *input_line_pointer++;
if (c == 0)
as_warn (_("missing closing '\"'"));
if (c == 0)
{
as_warn (_("missing closing '\"'"));
break;
}
if (c == '"')
{
char *ilp_save = input_line_pointer;
SKIP_WHITESPACE ();
if (*input_line_pointer == '"')
{
++input_line_pointer;
continue;
}
input_line_pointer = ilp_save;
break;
}
if (c == '\\')
switch (*input_line_pointer)
{
case '"':
case '\\':
c = *input_line_pointer++;
break;
default:
if (c != 0)
as_warn (_("'\\%c' in quoted symbol name; "
"behavior may change in the future"),
*input_line_pointer);
break;
}
*dst++ = c;
}
*dst = 0;
}
*--input_line_pointer = 0;
return c;

View File

@ -1,6 +1,13 @@
#nm: --extern-only
#nm: --extern-only --numeric-sort
#name: quoted symbol names
# No quoted strings handling (TC_STRING_ESCAPES set to 0):
#notarget: powerpc*-*-aix* powerpc*-*-beos* powerpc-*-macos* rs6000-*-*
# Explicitly no escapes in quoted strings:
#notarget: z80-*-*
#...
0+00 T test-a
0+01 T back\\slash
0+02 T back"slash
0+03 T backslash\\
0+04 T backslash"

View File

@ -1,4 +1,19 @@
.text
.globl "test-a"
"test-a":
.word 0
.byte 0
.globl "back\\slash"
"back\\slash":
.byte 0
.globl "back\"slash"
"back\"slash":
.byte 0
.globl "backslash\\"
"backslash\\":
.byte 0
.globl "backslash\""
"backslash\"":
.byte 0
/* .globl "back""slash" */
"back""slash":
.byte 0

View File

@ -1,7 +1,5 @@
#readelf: -S -s -p .strtab
#name: Multibyte symbol names
# The following targets use an unusual .set syntax...
#notarget: alpha*-*-* h8300-*-*
#...
Section Headers:

View File

@ -1,5 +1,5 @@
.section "sec\xa5\xc2tion"
.section "sec\xa5\xc2tion", "a"
.set "sy\xa5\xc2mbol", .
.equiv "sy\xa5\xc2mbol", .
.string8 "str\xa5\xc2ing"