mirror of
https://github.com/espressif/binutils-gdb.git
synced 2025-06-05 23:26:51 +08:00
Allow non-ASCII characters in Rust identifiers
Rust 1.53 (quite a while ago now) ungated the support for non-ASCII identifiers. This didn't work in gdb. This is PR rust/20166. This patch fixes the problem by allowing non-ASCII characters to be considered as identifier components. It seemed simplest to just pass them through -- doing any extra checking didn't seem worthwhile. The new test also verifies that such characters are allowed in strings and character literals as well. The latter also required a bit of work in the lexer. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=20166
This commit is contained in:
@ -33,6 +33,12 @@
|
||||
|
||||
using namespace expr;
|
||||
|
||||
#if WORDS_BIGENDIAN
|
||||
#define UTF32 "UTF-32BE"
|
||||
#else
|
||||
#define UTF32 "UTF-32LE"
|
||||
#endif
|
||||
|
||||
/* A regular expression for matching Rust numbers. This is split up
|
||||
since it is very long and this gives us a way to comment the
|
||||
sections. */
|
||||
@ -577,6 +583,35 @@ rust_parser::lex_escape (int is_byte)
|
||||
return result;
|
||||
}
|
||||
|
||||
/* A helper for lex_character. Search forward for the closing single
|
||||
quote, then convert the bytes from the host charset to UTF-32. */
|
||||
|
||||
static uint32_t
|
||||
lex_multibyte_char (const char *text, int *len)
|
||||
{
|
||||
/* Only look a maximum of 5 bytes for the closing quote. This is
|
||||
the maximum for UTF-8. */
|
||||
int quote;
|
||||
gdb_assert (text[0] != '\'');
|
||||
for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
|
||||
;
|
||||
*len = quote;
|
||||
/* The caller will issue an error. */
|
||||
if (text[quote] == '\0')
|
||||
return 0;
|
||||
|
||||
auto_obstack result;
|
||||
convert_between_encodings (host_charset (), UTF32, (const gdb_byte *) text,
|
||||
quote, 1, &result, translit_none);
|
||||
|
||||
int size = obstack_object_size (&result);
|
||||
if (size > 4)
|
||||
error (_("overlong character literal"));
|
||||
uint32_t value;
|
||||
memcpy (&value, obstack_finish (&result), size);
|
||||
return value;
|
||||
}
|
||||
|
||||
/* Lex a character constant. */
|
||||
|
||||
int
|
||||
@ -592,13 +627,15 @@ rust_parser::lex_character ()
|
||||
}
|
||||
gdb_assert (pstate->lexptr[0] == '\'');
|
||||
++pstate->lexptr;
|
||||
/* This should handle UTF-8 here. */
|
||||
if (pstate->lexptr[0] == '\\')
|
||||
if (pstate->lexptr[0] == '\'')
|
||||
error (_("empty character literal"));
|
||||
else if (pstate->lexptr[0] == '\\')
|
||||
value = lex_escape (is_byte);
|
||||
else
|
||||
{
|
||||
value = pstate->lexptr[0] & 0xff;
|
||||
++pstate->lexptr;
|
||||
int len;
|
||||
value = lex_multibyte_char (&pstate->lexptr[0], &len);
|
||||
pstate->lexptr += len;
|
||||
}
|
||||
|
||||
if (pstate->lexptr[0] != '\'')
|
||||
@ -695,16 +732,9 @@ rust_parser::lex_string ()
|
||||
if (is_byte)
|
||||
obstack_1grow (&obstack, value);
|
||||
else
|
||||
{
|
||||
#if WORDS_BIGENDIAN
|
||||
#define UTF32 "UTF-32BE"
|
||||
#else
|
||||
#define UTF32 "UTF-32LE"
|
||||
#endif
|
||||
convert_between_encodings (UTF32, "UTF-8", (gdb_byte *) &value,
|
||||
sizeof (value), sizeof (value),
|
||||
&obstack, translit_none);
|
||||
}
|
||||
convert_between_encodings (UTF32, "UTF-8", (gdb_byte *) &value,
|
||||
sizeof (value), sizeof (value),
|
||||
&obstack, translit_none);
|
||||
}
|
||||
else if (pstate->lexptr[0] == '\0')
|
||||
error (_("Unexpected EOF in string"));
|
||||
@ -746,7 +776,10 @@ rust_identifier_start_p (char c)
|
||||
return ((c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| c == '$');
|
||||
|| c == '$'
|
||||
/* Allow any non-ASCII character as an identifier. There
|
||||
doesn't seem to be a need to be picky about this. */
|
||||
|| (c & 0x80) != 0);
|
||||
}
|
||||
|
||||
/* Lex an identifier. */
|
||||
@ -772,13 +805,14 @@ rust_parser::lex_identifier ()
|
||||
|
||||
++pstate->lexptr;
|
||||
|
||||
/* For the time being this doesn't handle Unicode rules. Non-ASCII
|
||||
identifiers are gated anyway. */
|
||||
/* Allow any non-ASCII character here. This "handles" UTF-8 by
|
||||
passing it through. */
|
||||
while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
|
||||
|| (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
|
||||
|| pstate->lexptr[0] == '_'
|
||||
|| (is_gdb_var && pstate->lexptr[0] == '$')
|
||||
|| (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))
|
||||
|| (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
|
||||
|| (pstate->lexptr[0] & 0x80) != 0)
|
||||
++pstate->lexptr;
|
||||
|
||||
|
||||
|
51
gdb/testsuite/gdb.rust/unicode.exp
Normal file
51
gdb/testsuite/gdb.rust/unicode.exp
Normal file
@ -0,0 +1,51 @@
|
||||
# Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Test raw identifiers.
|
||||
|
||||
load_lib rust-support.exp
|
||||
if {[skip_rust_tests]} {
|
||||
continue
|
||||
}
|
||||
|
||||
# Non-ASCII identifiers were allowed starting in 1.53.
|
||||
set v [split [rust_compiler_version] .]
|
||||
if {[lindex $v 0] == 1 && [lindex $v 1] < 53} {
|
||||
untested "this test requires rust 1.53 or greater"
|
||||
return -1
|
||||
}
|
||||
|
||||
# Enable basic use of UTF-8. LC_ALL gets reset for each testfile.
|
||||
setenv LC_ALL C.UTF-8
|
||||
|
||||
standard_testfile .rs
|
||||
if {[prepare_for_testing "failed to prepare" $testfile $srcfile {debug rust}]} {
|
||||
return -1
|
||||
}
|
||||
|
||||
set line [gdb_get_line_number "set breakpoint here"]
|
||||
if {![runto ${srcfile}:$line]} {
|
||||
untested "could not run to breakpoint"
|
||||
return -1
|
||||
}
|
||||
|
||||
gdb_test "print 𝕯" " = 98" "print D"
|
||||
gdb_test "print \"𝕯\"" " = \"𝕯\"" "print D in string"
|
||||
# This output is maybe not ideal, but it also isn't incorrect.
|
||||
gdb_test "print '𝕯'" " = 120175 '\\\\u\\\{01d56f\\\}'" \
|
||||
"print D as char"
|
||||
gdb_test "print cç" " = 97" "print cc"
|
||||
|
||||
gdb_test "print 'çc'" "overlong character literal" "print cc as char"
|
26
gdb/testsuite/gdb.rust/unicode.rs
Normal file
26
gdb/testsuite/gdb.rust/unicode.rs
Normal file
@ -0,0 +1,26 @@
|
||||
// Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_variables)]
|
||||
#![allow(unused_assignments)]
|
||||
#![allow(uncommon_codepoints)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
fn main() {
|
||||
let 𝕯 = 98;
|
||||
let cç = 97;
|
||||
println!("{}, {}", 𝕯, cç); // set breakpoint here
|
||||
}
|
Reference in New Issue
Block a user