Skip to content

Commit

Permalink
lang: parser: Grab entire variable names during lexing
Browse files Browse the repository at this point in the history
This is inelegant, but I stronly believe that allowing dollar signs and their
variable name suffixes to both be first class citizens, is a bad idea.
The dollar sign should initiate a token that is exempt from any other
processing.

Note that it would be possible to construct a regex similar to the one for
IDENTIFIER, that only matches valid variable names. But it will be complex.
It seems more maintainable to just consume all allowed characters, and then
check validity with some simple golang in the parser actions.

Fixes purpleidea#728
  • Loading branch information
ffrank committed Mar 12, 2024
1 parent 6976f5f commit f973818
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 18 deletions.
10 changes: 5 additions & 5 deletions lang/interpret_test/TestAstFunc2/lexer-parser0.txtar
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
-- main.mcl --
import "fmt"
# unfortunately, for now `in` is a reserved keyword, see:
# https://github.com/purpleidea/mgmt/issues/728
$map = 55
$fn = func($in) { # in is a special keyword
# in is a special keyword, but accepted in a variable name
$fn = func($in) {
13
}
test fmt.printf("%d", $fn(0)) {}
func fn($in) { # in is a special keyword
func fn($in) {
42 + $map
}
test fmt.printf("%d", $fn(0)) {}
test fmt.printf("%d", fn(0)) {}
-- OUTPUT --
# err: errLexParse: parser: `syntax error: unexpected IN, expecting MAP_IDENTIFIER or IDENTIFIER` @5:2
Vertex: test[13]
Vertex: test[97]
15 changes: 10 additions & 5 deletions lang/parser/lexer.nex
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@
lval.str = yylex.Text()
return ARROW
}
/\$[a-z0-9_\.]+/ {
yylex.pos(lval) // our pos
// drop the dollar sign
lval.str = strings.TrimLeft(yylex.Text(), "$")
if strings.Contains(lval.str, ".") {
return DOTTED_VARNAME
}
return UNDOTTED_VARNAME
}
/\./ {
yylex.pos(lval) // our pos
lval.str = yylex.Text()
Expand All @@ -168,11 +177,7 @@
}
return DOT
}
/\$/ {
yylex.pos(lval) // our pos
lval.str = yylex.Text()
return DOLLAR
}

/bool/ {
yylex.pos(lval) // our pos
lval.str = yylex.Text()
Expand Down
1 change: 1 addition & 0 deletions lang/parser/lexparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ const (
ErrParseError = interfaces.Error("parser")
ErrParseSetType = interfaces.Error("can't set return type in parser")
ErrParseResFieldInvalid = interfaces.Error("can't use unknown resource field")
ErrInvalidVariableName = interfaces.Error("invalid variable name")
ErrParseAdditionalEquals = interfaces.Error(errstrParseAdditionalEquals)
ErrParseExpectingComma = interfaces.Error(errstrParseExpectingComma)
)
Expand Down
30 changes: 22 additions & 8 deletions lang/parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ func init() {
%token OPEN_BRACK CLOSE_BRACK
%token IF ELSE
%token BOOL STRING INTEGER FLOAT
%token EQUALS DOLLAR
%token EQUALS DOTTED_VARNAME
%token UNDOTTED_VARNAME
%token COMMA COLON SEMICOLON
%token ELVIS DEFAULT ROCKET ARROW DOT
%token BOOL_IDENTIFIER STR_IDENTIFIER INT_IDENTIFIER FLOAT_IDENTIFIER
Expand Down Expand Up @@ -1406,11 +1407,13 @@ undotted_identifier:
}
;
var_identifier:
// eg: $ foo (dollar prefix + identifier)
DOLLAR undotted_identifier
UNDOTTED_VARNAME
{
posLast(yylex, yyDollar) // our pos
$$.str = $2.str // don't include the leading $
if $1.str == "" || strings.HasPrefix($1.str, "_") || strings.HasSuffix($1.str, "_") {
yylex.Error(fmt.Sprintf("%s: %s", ErrInvalidVariableName, $1.str))
}
$$.str = $1.str
}
;
colon_identifier:
Expand Down Expand Up @@ -1439,13 +1442,24 @@ dotted_identifier:
$$.str = $1.str + interfaces.ModuleSep + $3.str
}
;
// there are different ways the lexer/parser might choose to represent this...
dotted_var_identifier:
// eg: $ foo.bar.baz (dollar prefix + dotted identifier)
DOLLAR dotted_identifier
DOTTED_VARNAME
{
posLast(yylex, yyDollar) // our pos
for _, ident := range strings.Split($1.str, ".") {
if ident == "" || strings.HasPrefix(ident, "_") || strings.HasSuffix(ident, "_") {
yylex.Error(fmt.Sprintf("%s: %s", ErrInvalidVariableName, $1.str))
}
}
$$.str = $1.str
}
| UNDOTTED_VARNAME
{
posLast(yylex, yyDollar) // our pos
$$.str = $2.str // don't include the leading $
if $1.str == "" || strings.HasPrefix($1.str, "_") || strings.HasSuffix($1.str, "_") {
yylex.Error(fmt.Sprintf("%s: %s", ErrInvalidVariableName, $1.str))
}
$$.str = $1.str
}
;
capitalized_res_identifier:
Expand Down

0 comments on commit f973818

Please sign in to comment.