diff --git a/ld/ldgram.y b/ld/ldgram.y index 24979deebbe..1f6c44a073c 100644 --- a/ld/ldgram.y +++ b/ld/ldgram.y @@ -421,21 +421,11 @@ statement_anywhere: lang_add_assignment (exp_assert ($4, $6)); } ; -/* The '*' and '?' cases are there because the lexer returns them as - separate tokens rather than as NAME. */ wildcard_name: NAME { $$ = $1; } - | '*' - { - $$ = "*"; - } - | '?' - { - $$ = "?"; - } ; wildcard_maybe_exclude: diff --git a/ld/ldlex.l b/ld/ldlex.l index b0861d78e49..6aeba6de656 100644 --- a/ld/ldlex.l +++ b/ld/ldlex.l @@ -192,132 +192,155 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* } return INT; } -"]" { RTOKEN(']');} -"[" { RTOKEN('[');} -"<<=" { RTOKEN(LSHIFTEQ);} -">>=" { RTOKEN(RSHIFTEQ);} -"||" { RTOKEN(OROR);} -"==" { RTOKEN(EQ);} -"!=" { RTOKEN(NE);} -">=" { RTOKEN(GE);} -"<=" { RTOKEN(LE);} -"<<" { RTOKEN(LSHIFT);} -">>" { RTOKEN(RSHIFT);} -"+=" { RTOKEN(PLUSEQ);} -"-=" { RTOKEN(MINUSEQ);} -"*=" { RTOKEN(MULTEQ);} -"/=" { RTOKEN(DIVEQ);} -"&=" { RTOKEN(ANDEQ);} -"|=" { RTOKEN(OREQ);} -"&&" { RTOKEN(ANDAND);} -">" { RTOKEN('>');} -"," { RTOKEN(',');} -"&" { RTOKEN('&');} -"|" { RTOKEN('|');} -"~" { RTOKEN('~');} -"!" { RTOKEN('!');} -"?" { RTOKEN('?');} -"*" { RTOKEN('*');} -"+" { RTOKEN('+');} -"-" { RTOKEN('-');} -"/" { RTOKEN('/');} -"%" { RTOKEN('%');} -"<" { RTOKEN('<');} -"=" { RTOKEN('=');} + + /* Some tokens that only appear in expressions must be enabled for + states other than EXPRESSION, since parser lookahead means they + must be recognised before the parser switches the lexer out of + SCRIPT or WILD state into EXPRESSION state. + + This sort of thing happens for example with NAME in ldgram.y + "section" rule, which is immediately followed by ldlex_expression. + However, if you follow the grammar from "sec_or_group_p1" you see + "assignment" appearing in "statement_anywhere". Now, + "assignment" also has NAME as its first token, just like + "section". So the parser can't know whether it is in the + "section" or the "assignment" rule until it has scanned the next + token to find an assignment operator. Thus the next token after + NAME in the "section" rule may be lexed before the lexer is + switched to EXPRESSION state, and there are quite a number of + optional components. The first token in all those components + must be able to be lexed in SCRIPT state, as well as the + assignment operators. In fact, due to "opt_exp_with_type", + anything that can appear on the left hand side of "exp" might + need to be lexed in SCRIPT state. + + MRI mode tends to cover everything in MRI scripts. + */ +"]" { RTOKEN(']'); } +"[" { RTOKEN('['); } +"<<=" { RTOKEN(LSHIFTEQ); } +">>=" { RTOKEN(RSHIFTEQ); } +"||" { RTOKEN(OROR); } +"==" { RTOKEN(EQ); } +"!=" { RTOKEN(NE); } +">=" { RTOKEN(GE); } +"<=" { RTOKEN(LE); } +"<<" { RTOKEN(LSHIFT); } +">>" { RTOKEN(RSHIFT); } +"+=" { RTOKEN(PLUSEQ); } +"-=" { RTOKEN(MINUSEQ); } +"*=" { RTOKEN(MULTEQ); } +"/=" { RTOKEN(DIVEQ); } +"&=" { RTOKEN(ANDEQ); } +"|=" { RTOKEN(OREQ); } +"&&" { RTOKEN(ANDAND); } +">" { RTOKEN('>'); } +"," { RTOKEN(','); } +"&" { RTOKEN('&'); } +"|" { RTOKEN('|'); } +"~" { RTOKEN('~'); } +"!" { RTOKEN('!'); } +"?" { RTOKEN('?'); } +"*" { RTOKEN('*'); } +"+" { RTOKEN('+'); } +"-" { RTOKEN('-'); } +"/" { RTOKEN('/'); } +"%" { RTOKEN('%'); } +"<" { RTOKEN('<'); } +"=" { RTOKEN('='); } "}" { RTOKEN('}'); } "{" { RTOKEN('{'); } -")" { RTOKEN(')');} -"(" { RTOKEN('(');} +")" { RTOKEN(')'); } +"(" { RTOKEN('('); } ":" { RTOKEN(':'); } -";" { RTOKEN(';');} -