Skip to content

Commit 46125a9

Browse files
authoredNov 18, 2024··
feat: KCL Go AST definition and parser API (#408)
Signed-off-by: peefy <xpf6677@163.com>
1 parent 2ffe1ee commit 46125a9

File tree

9 files changed

+3336
-12
lines changed

9 files changed

+3336
-12
lines changed
 

‎example_test.go

+9
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
kcl "kcl-lang.io/kcl-go"
1313
"kcl-lang.io/kcl-go/pkg/native"
14+
"kcl-lang.io/kcl-go/pkg/parser"
1415
"kcl-lang.io/kcl-go/pkg/spec/gpyrpc"
1516
)
1617

@@ -213,6 +214,14 @@ age = option("age")
213214
// name: kcl
214215
}
215216

217+
func ExampleParseFile() {
218+
result, err := parser.ParseFile("testdata/main.k", nil)
219+
if err != nil {
220+
log.Fatal(err)
221+
}
222+
fmt.Println(result)
223+
}
224+
216225
func ExampleParseProgram() {
217226
result, err := kcl.ParseProgram(&kcl.ParseProgramArgs{
218227
Paths: []string{"testdata/main.k"},

‎pkg/ast/ast.go

+30-12
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,43 @@
11
package ast
22

3-
// TODO: add more nodes from https://github.com/kcl-lang/kcl/blob/main/kclvm/ast/src/ast.rs
3+
// Module is an abstract syntax tree for a single KCL file.
4+
type Module struct {
5+
Filename string `json:"filename"`
6+
Pkg string `json:"pkg"`
7+
Doc *Node[string] `json:"doc"`
8+
Body []*Node[Stmt] `json:"body"`
9+
Comments []*Node[Comment] `json:"comments"`
10+
}
411

5-
// Pos denotes the struct tuple (filename, line, column, end_line, end_column).
6-
type Pos struct {
7-
Filename string `json:"filename"`
8-
Line uint64 `json:"line"`
9-
Column uint64 `json:"column"`
10-
EndLine uint64 `json:"end_line"`
11-
EndColumn uint64 `json:"end_column"`
12+
// NewModule creates a new Module instance
13+
func NewModule() *Module {
14+
return &Module{
15+
Body: make([]*Node[Stmt], 0),
16+
Comments: make([]*Node[Comment], 0),
17+
}
1218
}
1319

14-
// Node is the file, line, and column number information that all AST nodes need to contain.
15-
type Node interface {
16-
Pos() Pos
17-
Index() string
20+
// Node is the file, line and column number information that all AST nodes need to contain.
21+
// In fact, column and end_column are the counts of character. For example, `\t` is counted as 1 character,
22+
// so it is recorded as 1 here, but generally col is 4.
23+
type Node[T any] struct {
24+
ID AstIndex `json:"id,omitempty"`
25+
Node T `json:"node,omitempty"`
26+
Pos
1827
}
1928

2029
// AstIndex represents a unique identifier for AST nodes.
2130
type AstIndex string
2231

32+
// Pos denotes the struct tuple (filename, line, column, end_line, end_column).
33+
type Pos struct {
34+
Filename string `json:"filename,omitempty"`
35+
Line int64 `json:"line,omitempty"`
36+
Column int64 `json:"column,omitempty"`
37+
EndLine int64 `json:"end_line,omitempty"`
38+
EndColumn int64 `json:"end_column,omitempty"`
39+
}
40+
2341
// Comment node.
2442
type Comment struct {
2543
Text string

‎pkg/ast/expr.go

+692
Large diffs are not rendered by default.

‎pkg/ast/json.go

+1,576
Large diffs are not rendered by default.

‎pkg/ast/op.go

+519
Large diffs are not rendered by default.

‎pkg/ast/stmt.go

+282
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
package ast
2+
3+
// Stmt is an interface for all statement types
4+
type Stmt interface {
5+
Type() string
6+
}
7+
8+
// BaseStmt is a struct that all statement types can embed to implement the Stmt interface
9+
type BaseStmt struct {
10+
StmtType string `json:"type"`
11+
}
12+
13+
func (b BaseStmt) Type() string {
14+
return b.StmtType
15+
}
16+
17+
// Define all the statement types
18+
type (
19+
// TypeAliasStmt represents a type alias statement, e.g.
20+
//
21+
// type StrOrInt = str | int
22+
TypeAliasStmt struct {
23+
BaseStmt
24+
TypeName *Node[Identifier] `json:"type_name"`
25+
TypeValue *Node[string] `json:"type_value"`
26+
Ty *Node[Type] `json:"ty"`
27+
}
28+
// ExprStmt represents a expression statement, e.g.
29+
//
30+
// 1
31+
//
32+
// """A long string"""
33+
//
34+
// 'A string'
35+
ExprStmt struct {
36+
BaseStmt
37+
Exprs []*Node[Expr] `json:"exprs"`
38+
}
39+
// UnificationStmt represents a declare statement with the union operator, e.g.
40+
//
41+
// data: ASchema {}
42+
UnificationStmt struct {
43+
BaseStmt
44+
Target *Node[Identifier] `json:"target"`
45+
Value *Node[SchemaConfig] `json:"value"`
46+
}
47+
// AssignStmt represents an assignment, e.g.
48+
//
49+
// a: int = 1
50+
//
51+
// a = 1
52+
//
53+
// a = b = 1
54+
AssignStmt struct {
55+
BaseStmt
56+
Targets []*Node[Target] `json:"targets"`
57+
Value *Node[Expr] `json:"value"`
58+
Ty *Node[Type] `json:"ty"`
59+
}
60+
// AugAssignStmt represents an augmented assignment, e.g.
61+
//
62+
// a += 1
63+
//
64+
// a -= 1
65+
AugAssignStmt struct {
66+
BaseStmt
67+
Target *Node[Target] `json:"target"`
68+
Value *Node[Expr] `json:"value"`
69+
Op AugOp `json:"op"`
70+
}
71+
// AssertStmt represents an assert statement, e.g.
72+
//
73+
// assert True if condition, "Assert failed message"
74+
AssertStmt struct {
75+
BaseStmt
76+
Test *Node[Expr] `json:"test"`
77+
IfCond *Node[Expr] `json:"if_cond,omitempty"`
78+
Msg *Node[Expr] `json:"msg,omitempty"`
79+
}
80+
// IfStmt represents an if statement, e.g.
81+
//
82+
// if condition1:
83+
//
84+
// if condition2:
85+
// a = 1
86+
//
87+
// elif condition3:
88+
//
89+
// b = 2
90+
//
91+
// else:
92+
//
93+
// c = 3
94+
IfStmt struct {
95+
BaseStmt
96+
Body []*Node[Stmt] `json:"body"`
97+
Cond *Node[Expr] `json:"cond"`
98+
Orelse []*Node[Stmt] `json:"orelse,omitempty"`
99+
}
100+
// ImportStmt represents an import statement, e.g.
101+
//
102+
// import pkg as pkg_alias
103+
ImportStmt struct {
104+
BaseStmt
105+
Path *Node[string] `json:"path"`
106+
Rawpath string `json:"rawpath"`
107+
Name string `json:"name"`
108+
Asname *Node[string] `json:"asname,omitempty"`
109+
PkgName string `json:"pkg_name"`
110+
}
111+
// SchemaAttr represents schema attribute definitions, e.g.
112+
//
113+
// schema SchemaAttrExample:
114+
//
115+
// x: int
116+
// y: str
117+
SchemaAttr struct {
118+
BaseStmt
119+
Doc string `json:"doc,omitempty"`
120+
Name *Node[string] `json:"name"`
121+
Op AugOp `json:"op,omitempty"`
122+
Value *Node[Expr] `json:"value,omitempty"`
123+
IsOptional bool `json:"is_optional"`
124+
Decorators []*Node[Decorator] `json:"decorators,omitempty"`
125+
Ty *Node[Type] `json:"ty,omitempty"`
126+
}
127+
// SchemaStmt represents a schema statement, e.g.
128+
//
129+
// schema BaseSchema:
130+
//
131+
// schema SchemaExample(BaseSchema)[arg: str]:
132+
//
133+
// """Schema documents"""
134+
// attr?: str = arg
135+
// check:
136+
// len(attr) > 3 if attr, "Check failed message"
137+
//
138+
// mixin MixinExample for ProtocolExample:
139+
//
140+
// attr: int
141+
//
142+
// protocol ProtocolExample:
143+
//
144+
// attr: int
145+
SchemaStmt struct {
146+
BaseStmt
147+
Doc *Node[string] `json:"doc,omitempty"`
148+
Name *Node[string] `json:"name"`
149+
ParentName *Node[Identifier] `json:"parent_name,omitempty"`
150+
ForHostName *Node[Identifier] `json:"for_host_name,omitempty"`
151+
IsMixin bool `json:"is_mixin"`
152+
IsProtocol bool `json:"is_protocol"`
153+
Args *Node[Arguments] `json:"args,omitempty"`
154+
Mixins []*Node[Identifier] `json:"mixins,omitempty"`
155+
Body []*Node[Stmt] `json:"body,omitempty"`
156+
Decorators []*Node[Decorator] `json:"decorators,omitempty"`
157+
Checks []*Node[CheckExpr] `json:"checks,omitempty"`
158+
IndexSignature *Node[SchemaIndexSignature] `json:"index_signature,omitempty"`
159+
}
160+
// RuleStmt represents a rule statement, e.g.
161+
//
162+
// rule RuleExample:
163+
//
164+
// a > 1
165+
// b < 0
166+
RuleStmt struct {
167+
BaseStmt
168+
Doc *Node[string] `json:"doc,omitempty"`
169+
Name *Node[string] `json:"name"`
170+
ParentRules []*Node[Identifier] `json:"parent_rules,omitempty"`
171+
Decorators []*Node[Decorator] `json:"decorators,omitempty"`
172+
Checks []*Node[CheckExpr] `json:"checks,omitempty"`
173+
Args *Node[Arguments] `json:"args,omitempty"`
174+
ForHostName *Node[Identifier] `json:"for_host_name,omitempty"`
175+
}
176+
)
177+
178+
// NewTypeAliasStmt creates a new TypeAliasStmt
179+
func NewTypeAliasStmt() *TypeAliasStmt {
180+
return &TypeAliasStmt{
181+
BaseStmt: BaseStmt{StmtType: "TypeAlias"},
182+
}
183+
}
184+
185+
// NewExprStmt creates a new ExprStmt
186+
func NewExprStmt() *ExprStmt {
187+
return &ExprStmt{
188+
BaseStmt: BaseStmt{StmtType: "Expr"},
189+
Exprs: make([]*Node[Expr], 0),
190+
}
191+
}
192+
193+
// NewUnificationStmt creates a new UnificationStmt
194+
func NewUnificationStmt() *UnificationStmt {
195+
return &UnificationStmt{
196+
BaseStmt: BaseStmt{StmtType: "Unification"},
197+
}
198+
}
199+
200+
// NewAssignStmt creates a new AssignStmt
201+
func NewAssignStmt() *AssignStmt {
202+
return &AssignStmt{
203+
BaseStmt: BaseStmt{StmtType: "Assign"},
204+
}
205+
}
206+
207+
// NewAugAssignStmt creates a new AugAssignStmt
208+
func NewAugAssignStmt() *AugAssignStmt {
209+
return &AugAssignStmt{
210+
BaseStmt: BaseStmt{StmtType: "AugAssign"},
211+
}
212+
}
213+
214+
// NewAssertStmt creates a new AssertStmt
215+
func NewAssertStmt() *AssertStmt {
216+
return &AssertStmt{
217+
BaseStmt: BaseStmt{StmtType: "Assert"},
218+
}
219+
}
220+
221+
// NewIfStmt creates a new IfStmt
222+
func NewIfStmt() *IfStmt {
223+
return &IfStmt{
224+
BaseStmt: BaseStmt{StmtType: "If"},
225+
Body: make([]*Node[Stmt], 0),
226+
Orelse: make([]*Node[Stmt], 0),
227+
}
228+
}
229+
230+
// NewImportStmt creates a new ImportStmt
231+
func NewImportStmt() *ImportStmt {
232+
return &ImportStmt{
233+
BaseStmt: BaseStmt{StmtType: "Import"},
234+
}
235+
}
236+
237+
// NewSchemaAttr creates a new SchemaAttr
238+
func NewSchemaAttr() *SchemaAttr {
239+
return &SchemaAttr{
240+
BaseStmt: BaseStmt{StmtType: "SchemaAttr"},
241+
Decorators: make([]*Node[Decorator], 0),
242+
}
243+
}
244+
245+
// NewSchemaStmt creates a new SchemaStmt
246+
func NewSchemaStmt() *SchemaStmt {
247+
return &SchemaStmt{
248+
BaseStmt: BaseStmt{StmtType: "Schema"},
249+
Mixins: make([]*Node[Identifier], 0),
250+
Body: make([]*Node[Stmt], 0),
251+
Decorators: make([]*Node[Decorator], 0),
252+
Checks: make([]*Node[CheckExpr], 0),
253+
}
254+
}
255+
256+
// NewRuleStmt creates a new RuleStmt
257+
func NewRuleStmt() *RuleStmt {
258+
return &RuleStmt{
259+
BaseStmt: BaseStmt{StmtType: "Rule"},
260+
ParentRules: make([]*Node[Identifier], 0),
261+
Decorators: make([]*Node[Decorator], 0),
262+
Checks: make([]*Node[CheckExpr], 0),
263+
}
264+
}
265+
266+
// SchemaIndexSignature represents a schema index signature, e.g.
267+
//
268+
// schema SchemaIndexSignatureExample:
269+
//
270+
// [str]: int
271+
type SchemaIndexSignature struct {
272+
KeyName *Node[string] `json:"key_name,omitempty"`
273+
Value *Node[Expr] `json:"value,omitempty"`
274+
AnyOther bool `json:"any_other"`
275+
KeyTy *Node[Type] `json:"key_ty,omitempty"`
276+
ValueTy *Node[Type] `json:"value_ty,omitempty"`
277+
}
278+
279+
// NewSchemaIndexSignature creates a new SchemaIndexSignature
280+
func NewSchemaIndexSignature() *SchemaIndexSignature {
281+
return &SchemaIndexSignature{}
282+
}

‎pkg/ast/type.go

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
package ast
2+
3+
// Type is the base interface for all AST types
4+
type Type interface {
5+
TypeName() string
6+
}
7+
8+
// NamedType represents a named type
9+
type NamedType struct {
10+
Value struct {
11+
Identifier *Identifier `json:"identifier"`
12+
} `json:"value"`
13+
}
14+
15+
func (n *NamedType) TypeName() string { return "Named" }
16+
17+
// AnyType represents a the any type
18+
type AnyType struct{}
19+
20+
func (n *AnyType) TypeName() string { return "Any" }
21+
22+
// BasicType represents a basic type
23+
type BasicType struct {
24+
Value BasicTypeEnum `json:"value"`
25+
}
26+
27+
func (b *BasicType) TypeName() string { return "Basic" }
28+
29+
type BasicTypeEnum string
30+
31+
const (
32+
Bool BasicTypeEnum = "Bool"
33+
Int BasicTypeEnum = "Int"
34+
Float BasicTypeEnum = "Float"
35+
Str BasicTypeEnum = "Str"
36+
)
37+
38+
// ListType represents a list type
39+
type ListType struct {
40+
Value struct {
41+
InnerType *Node[Type] `json:"inner_type,omitempty"`
42+
} `json:"value"`
43+
}
44+
45+
func (l *ListType) TypeName() string { return "List" }
46+
47+
// DictType represents a dictionary type
48+
type DictType struct {
49+
Value struct {
50+
KeyType *Node[Type] `json:"key_type,omitempty"`
51+
ValueType *Node[Type] `json:"value_type,omitempty"`
52+
} `json:"value"`
53+
}
54+
55+
func (d *DictType) TypeName() string { return "Dict" }
56+
57+
// UnionType represents a union type
58+
type UnionType struct {
59+
Value struct {
60+
TypeElements []*Node[Type] `json:"type_elements"`
61+
} `json:"value"`
62+
}
63+
64+
func (u *UnionType) TypeName() string { return "Union" }
65+
66+
// LiteralType represents a literal type
67+
type LiteralType struct {
68+
Value LiteralTypeValue `json:"value"`
69+
}
70+
71+
func (l *LiteralType) TypeName() string { return "Literal" }
72+
73+
// LiteralTypeValue is an interface for different literal types
74+
type LiteralTypeValue interface {
75+
LiteralTypeName() string
76+
}
77+
78+
// BoolLiteralType represents a boolean literal type
79+
type BoolLiteralType bool
80+
81+
func (b *BoolLiteralType) LiteralTypeName() string { return "Bool" }
82+
83+
// IntLiteralType represents an integer literal type
84+
type IntLiteralType struct {
85+
Value int `json:"value"`
86+
Suffix *NumberBinarySuffix `json:"binary_suffix,omitempty"`
87+
}
88+
89+
func (i *IntLiteralType) LiteralTypeName() string { return "Int" }
90+
91+
// FloatLiteralType represents a float literal type
92+
type FloatLiteralType float64
93+
94+
func (f *FloatLiteralType) LiteralTypeName() string { return "Float" }
95+
96+
// StrLiteralType represents a string literal type
97+
type StrLiteralType string
98+
99+
func (s *StrLiteralType) LiteralTypeName() string { return "Str" }
100+
101+
// FunctionType represents a function type
102+
type FunctionType struct {
103+
Value struct {
104+
ParamsTy []*Node[Type] `json:"paramsTy,omitempty"`
105+
RetTy *Node[Type] `json:"retTy,omitempty"`
106+
} `json:"value"`
107+
}
108+
109+
func (f *FunctionType) TypeName() string { return "Function" }

‎pkg/parser/parser.go

+20
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package parser
22

33
import (
4+
"encoding/json"
45
"fmt"
56
"io"
67

8+
"kcl-lang.io/kcl-go/pkg/ast"
79
"kcl-lang.io/kcl-go/pkg/kcl"
810
"kcl-lang.io/kcl-go/pkg/spec/gpyrpc"
911
)
@@ -45,6 +47,24 @@ func ParseFileASTJson(filename string, src interface{}) (result string, err erro
4547
return resp.AstJson, nil
4648
}
4749

50+
// ParseFile parses the source code from the specified file or Reader
51+
// and returns the Go structure representation of the Abstract Syntax
52+
// Tree (AST). The source code can be provided directly as a string or
53+
// []byte, or indirectly via a filename or an io.Reader. If src is nil,
54+
// the function reads the content from the provided filename.
55+
func ParseFile(filename string, src interface{}) (m *ast.Module, err error) {
56+
astJson, err := ParseFileASTJson(filename, src)
57+
if err != nil {
58+
return nil, err
59+
}
60+
m = ast.NewModule()
61+
err = json.Unmarshal([]byte(astJson), m)
62+
if err != nil {
63+
return nil, err
64+
}
65+
return
66+
}
67+
4868
// Parse KCL program with entry files and return the AST JSON string.
4969
func ParseProgram(args *ParseProgramArgs) (*ParseProgramResult, error) {
5070
svc := kcl.Service()

‎pkg/parser/parser_test.go

+99
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,110 @@
11
package parser
22

33
import (
4+
"os"
5+
"path/filepath"
46
"strings"
57
"testing"
68
"time"
9+
10+
"kcl-lang.io/kcl-go/pkg/ast"
711
)
812

13+
func TestParseFile(t *testing.T) {
14+
// Example: Test with string source
15+
src := `schema Name:
16+
name: str
17+
18+
a: int = 1
19+
b = 2Ki
20+
c = 1 + 1
21+
d = "123"
22+
e: 123 = 123
23+
f: "Red" | "Blue" = "Red"
24+
n1 = Name()
25+
n2 = Name {name = "name"}
26+
n3: Name {name = "name"}
27+
schema Container:
28+
name: str = "main"
29+
command?: [str]
30+
ports?: [ContainerPort]
31+
32+
schema Person:
33+
name?: any
34+
35+
version = "dev"
36+
37+
appConfiguration = xxx.xxxAppConfiguration {
38+
mainContainer = container.Main {
39+
readinessProbe = probe_tpl.defaultReadinessProbe
40+
env : [
41+
e.Env {
42+
name: "a"
43+
value: "b"
44+
},
45+
] + values._envs
46+
}
47+
}
48+
` // Sample KCL source code
49+
module, err := ParseFile("", src)
50+
if err != nil {
51+
t.Errorf("ParseFile failed with string source: %v and error: %v", src, err)
52+
}
53+
if module == nil {
54+
t.Errorf("Expected non-empty AST JSON with string source")
55+
} else {
56+
schemaStmt := module.Body[0].Node.(*ast.SchemaStmt)
57+
if len(schemaStmt.Body) != 1 {
58+
t.Errorf("wrong schema stmt body count")
59+
}
60+
simpleAssignStmt := module.Body[1].Node.(*ast.AssignStmt)
61+
if simpleAssignStmt.Value.Node.(*ast.NumberLit).Value.(*ast.IntNumberLitValue).Value != 1 {
62+
t.Errorf("wrong assign stmt literal value")
63+
}
64+
schemaAssignStmt := module.Body[8].Node.(*ast.AssignStmt)
65+
if len(schemaAssignStmt.Value.Node.(*ast.SchemaExpr).Config.Node.(*ast.ConfigExpr).Items) != 1 {
66+
t.Errorf("wrong assign stmt schema entry count")
67+
}
68+
schemaUnificationStmt := module.Body[9].Node.(*ast.UnificationStmt)
69+
if len(schemaUnificationStmt.Value.Node.Config.Node.(*ast.ConfigExpr).Items) != 1 {
70+
t.Errorf("wrong assign stmt schema entry count")
71+
}
72+
}
73+
}
74+
75+
func TestParseFileInTheWholeRepo(t *testing.T) {
76+
root := filepath.Join(".", "..", "..")
77+
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
78+
if err != nil {
79+
return err
80+
}
81+
if !info.IsDir() && strings.HasSuffix(info.Name(), ".k") {
82+
testParseFile(t, path)
83+
}
84+
return nil
85+
})
86+
if err != nil {
87+
t.Errorf("Error walking the path %v: %v", root, err)
88+
}
89+
}
90+
91+
func testParseFile(t *testing.T, path string) {
92+
var err error
93+
var content string
94+
95+
t.Logf("Start parse file: %s", path)
96+
module, err := ParseFile(path, content)
97+
if err != nil {
98+
t.Errorf("ParseFile failed for %s: %v", path, err)
99+
return
100+
}
101+
if module == nil {
102+
t.Errorf("Expected non-empty AST JSON for %s", path)
103+
return
104+
}
105+
t.Logf("Successfully parsed file: %s", path)
106+
}
107+
9108
// TestParseFileASTJson tests the ParseFileASTJson function with various input sources.
10109
func TestParseFileASTJson(t *testing.T) {
11110
// Example: Test with string source

0 commit comments

Comments
 (0)
Please sign in to comment.