diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 000000000..3b8d477f5 --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: GraphAr Go CI + +on: + push: + branches: + - main + paths: + - 'go/graphar/**' + - '.github/workflows/go.yml' + pull_request: + branches: + - main + paths: + - 'go/graphar/**' + - '.github/workflows/go.yml' + +concurrency: + group: ${{ github.repository }}-${{ github.event.number || github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + go-test: + name: Go 1.21 + runs-on: ubuntu-latest + if: ${{ !contains(github.event.pull_request.title, 'WIP') && !github.event.pull_request.draft }} + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + cache: true + cache-dependency-path: go/graphar/go.sum + + - name: Go test + working-directory: go/graphar + run: go test ./... diff --git a/go/graphar/Makefile b/go/graphar/Makefile new file mode 100644 index 000000000..605dbc750 --- /dev/null +++ b/go/graphar/Makefile @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +.PHONY: test + +test: + go test ./... + diff --git a/go/graphar/go.mod b/go/graphar/go.mod new file mode 100644 index 000000000..56f5a815d --- /dev/null +++ b/go/graphar/go.mod @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +module github.com/apache/incubator-graphar/go/graphar + +go 1.21 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go/graphar/go.sum b/go/graphar/go.sum new file mode 100644 index 000000000..a62c313c5 --- /dev/null +++ b/go/graphar/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/go/graphar/graphar.go b/go/graphar/graphar.go new file mode 100644 index 000000000..767e837fd --- /dev/null +++ b/go/graphar/graphar.go @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package graphar + +import ( + "context" + "fmt" + + "github.com/apache/incubator-graphar/go/graphar/info" +) + +// FileSystem defines a minimal interface for reading files. +// It matches the requirements for loading GraphAr info files. +type FileSystem interface { + // ReadFile reads the named file and returns the contents. + ReadFile(ctx context.Context, name string) ([]byte, error) +} + +// GraphAr is the main entry point for using the GraphAr Go SDK. +type GraphAr interface { + // LoadGraphInfo loads a graph-level Info YAML (plus referenced vertex/edge info files) + // using the configured FileSystem. + LoadGraphInfo(ctx context.Context, path string) (*info.GraphInfo, error) +} + +type client struct { + fs FileSystem +} + +// option configures the GraphAr instance. +type option func(*client) + +// WithFileSystem sets a custom FileSystem for the GraphAr instance. +func WithFileSystem(fs FileSystem) option { + return func(c *client) { + c.fs = fs + } +} + +// New creates a new GraphAr instance. +func New(opts ...option) GraphAr { + c := &client{} + for _, opt := range opts { + if opt != nil { + opt(c) + } + } + return c +} + +// LoadGraphInfo loads a graph-level Info YAML (plus referenced vertex/edge +// info files) using the configured FileSystem. +func (c *client) LoadGraphInfo(ctx context.Context, path string) (*info.GraphInfo, error) { + var g *info.GraphInfo + var err error + + if c.fs != nil { + // If custom FS is provided, we need to load via reader + b, err := c.fs.ReadFile(ctx, path) + if err != nil { + return nil, fmt.Errorf("graphar: read graph info %q from fs: %w", path, err) + } + // We'll need to update info.LoadGraphInfo to accept a loader or use a simplified version + g, err = info.LoadGraphInfoFromBytes(b, path, func(p string) ([]byte, error) { + return c.fs.ReadFile(ctx, p) + }) + } else { + g, err = info.LoadGraphInfo(path) + } + + if err != nil { + return nil, fmt.Errorf("graphar: load graph info %q: %w", path, err) + } + return g, nil +} diff --git a/go/graphar/graphar_client_test.go b/go/graphar/graphar_client_test.go new file mode 100644 index 000000000..746186a57 --- /dev/null +++ b/go/graphar/graphar_client_test.go @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package graphar + +import ( + "context" + "path/filepath" + "runtime" + "testing" +) + +func getRepoRoot() string { + _, filename, _, _ := runtime.Caller(0) + // current file: go/graphar/graphar_client_test.go + return filepath.Join(filepath.Dir(filename), "..", "..") +} + +func TestNewClient(t *testing.T) { + t.Run("default", func(t *testing.T) { + c := New() + if c == nil { + t.Fatalf("expected non-nil client") + } + }) + + t.Run("with options", func(t *testing.T) { + called := false + opt := func(c *client) { + called = true + } + c := New(opt, nil) + if c == nil { + t.Fatalf("expected non-nil client") + } + if !called { + t.Errorf("expected option to be called") + } + }) +} + +func TestClientLoadGraphInfoErrors(t *testing.T) { + c := New() + _, err := c.LoadGraphInfo(context.Background(), "non-existent-path.yml") + if err == nil { + t.Errorf("expected error for non-existent path") + } +} + +func TestClientLoadGraphInfo(t *testing.T) { + root := getRepoRoot() + graphPath := filepath.Join(root, "testing", "modern_graph", "modern_graph.graph.yml") + + c := New() + g, err := c.LoadGraphInfo(context.Background(), graphPath) + if err != nil { + t.Fatalf("LoadGraphInfo failed: %v", err) + } + if g.Name != "modern_graph" { + t.Fatalf("unexpected graph name: %s", g.Name) + } + if !g.HasVertex("person") { + t.Fatalf("expected graph to have vertex person") + } + if !g.HasEdge("person", "knows", "person") { + t.Fatalf("expected graph to have edge person-knows-person") + } +} diff --git a/go/graphar/graphar_test.go b/go/graphar/graphar_test.go new file mode 100644 index 000000000..eefbd264a --- /dev/null +++ b/go/graphar/graphar_test.go @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package graphar + +import ( + "context" + "errors" + "testing" +) + +func TestNew(t *testing.T) { + t.Parallel() + + c := New() + if c == nil { + t.Fatalf("New() returned nil") + } +} + +type mockFS struct { + files map[string][]byte +} + +func (m *mockFS) ReadFile(ctx context.Context, name string) ([]byte, error) { + if b, ok := m.files[name]; ok { + return b, nil + } + return nil, errors.New("file not found") +} + +func TestClientWithFileSystem(t *testing.T) { + fs := &mockFS{ + files: map[string][]byte{ + "graph.yaml": []byte(` +name: test_graph +prefix: /tmp/ +vertices: [v.yaml] +edges: [] +version: 0.1.0 +`), + "v.yaml": []byte(` +type: person +chunk_size: 100 +prefix: person/ +property_groups: [] +version: 0.1.0 +`), + }, + } + + c := New(WithFileSystem(fs)) + g, err := c.LoadGraphInfo(context.Background(), "graph.yaml") + if err != nil { + t.Fatalf("LoadGraphInfo failed: %v", err) + } + + if g.Name != "test_graph" { + t.Errorf("expected name test_graph, got %s", g.Name) + } + if len(g.VertexInfos) != 1 { + t.Errorf("expected 1 vertex info, got %d", len(g.VertexInfos)) + } +} diff --git a/go/graphar/info/edge_info.go b/go/graphar/info/edge_info.go new file mode 100644 index 000000000..016106797 --- /dev/null +++ b/go/graphar/info/edge_info.go @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "fmt" + "net/url" + "strings" +) + +// AdjacentList describes one adjacency list layout of an edge type. +type AdjacentList struct { + Type AdjListType `yaml:"type"` + FileType FileType `yaml:"file_type"` + Prefix string `yaml:"prefix"` + Ordered bool `yaml:"ordered,omitempty"` + Aligned string `yaml:"aligned_by,omitempty"` // "src" or "dst" + + baseURI *url.URL `yaml:"-"` +} + +func (a *AdjacentList) Validate() error { + if a.FileType == "" { + return fmt.Errorf("%w: adjacent list file type is empty", ErrMissingField) + } + if !a.FileType.IsValid() { + return fmt.Errorf("%w: invalid file type %q for adjacent list %q", ErrInvalid, a.FileType, a.Type) + } + return nil +} + +func (a *AdjacentList) GetPrefix() *url.URL { + return a.baseURI +} + +// EdgeInfo describes metadata for an edge type. +type EdgeInfo struct { + SrcType string `yaml:"src_type"` + EdgeType string `yaml:"edge_type"` + DstType string `yaml:"dst_type"` + ChunkSize int64 `yaml:"chunk_size"` + SrcChunkSize int64 `yaml:"src_chunk_size"` + DstChunkSize int64 `yaml:"dst_chunk_size"` + Directed bool `yaml:"directed"` + BaseURI *url.URL `yaml:"-"` + Prefix string `yaml:"prefix"` + Adjacent []*AdjacentList `yaml:"adj_lists"` + PropertyGroups []*PropertyGroup `yaml:"property_groups"` + Version VersionInfo `yaml:"version"` + + pgroups propertyGroups `yaml:"-"` + adjMap map[AdjListType]*AdjacentList `yaml:"-"` +} + +func (e *EdgeInfo) IsDirected() bool { + return e.Directed +} + +func (e *EdgeInfo) GetPrefix() *url.URL { + return e.BaseURI +} + +func (e *EdgeInfo) HasAdjListType(adjListType AdjListType) bool { + _, ok := e.adjMap[adjListType] + return ok +} + +func (e *EdgeInfo) HasProperty(propertyName string) bool { + return e.pgroups.hasProperty(propertyName) +} + +func (e *EdgeInfo) IsPrimaryKey(propertyName string) (bool, error) { + return e.pgroups.isPrimaryKey(propertyName) +} + +func (e *EdgeInfo) IsNullableKey(propertyName string) (bool, error) { + return e.pgroups.isNullableKey(propertyName) +} + +func (e *EdgeInfo) GetPropertyType(propertyName string) (DataType, error) { + return e.pgroups.propertyType(propertyName) +} + +func (e *EdgeInfo) GetPropertyGroup(propertyName string) (*PropertyGroup, error) { + return e.pgroups.propertyGroupFor(propertyName) +} + +func (e *EdgeInfo) GetAdjacentList(adjListType AdjListType) (*AdjacentList, error) { + adj, ok := e.adjMap[adjListType] + if !ok { + return nil, fmt.Errorf("%w: adjacent list type %s not found", ErrNotFound, adjListType) + } + return adj, nil +} + +func (e *EdgeInfo) GetAdjacentListURI(adjListType AdjListType) (*url.URL, error) { + adj, err := e.GetAdjacentList(adjListType) + if err != nil { + return nil, err + } + base := e.GetPrefix() + if base == nil { + return nil, fmt.Errorf("%w: edge base uri is nil", ErrInvalid) + } + rel := adj.GetPrefix() + if rel == nil { + return nil, fmt.Errorf("%w: adjacent list prefix is nil", ErrInvalid) + } + return base.ResolveReference(rel).JoinPath(pathAdjList), nil +} + +func (e *EdgeInfo) GetAdjacentListChunkURI(adjListType AdjListType, vertexChunkIndex int64) (*url.URL, error) { + uri, err := e.GetAdjacentListURI(adjListType) + if err != nil { + return nil, err + } + return uri.JoinPath(fmt.Sprintf("%s%d", chunkPrefix, vertexChunkIndex)), nil +} + +func (e *EdgeInfo) GetOffsetURI(adjListType AdjListType) (*url.URL, error) { + uri, err := e.GetAdjacentListURI(adjListType) + if err != nil { + return nil, err + } + return uri.JoinPath(pathOffset), nil +} + +func (e *EdgeInfo) GetOffsetChunkURI(adjListType AdjListType, vertexChunkIndex int64) (*url.URL, error) { + uri, err := e.GetOffsetURI(adjListType) + if err != nil { + return nil, err + } + return uri.JoinPath(fmt.Sprintf("%s%d", chunkPrefix, vertexChunkIndex)), nil +} + +func (e *EdgeInfo) GetVerticesNumFileURI(adjListType AdjListType) (*url.URL, error) { + uri, err := e.GetAdjacentListURI(adjListType) + if err != nil { + return nil, err + } + return uri.JoinPath(fileVertexCount), nil +} + +func (e *EdgeInfo) GetEdgesNumFileURI(adjListType AdjListType, vertexChunkIndex int64) (*url.URL, error) { + uri, err := e.GetAdjacentListURI(adjListType) + if err != nil { + return nil, err + } + return uri.JoinPath(fmt.Sprintf("%s%d", fileEdgeCountPrefix, vertexChunkIndex)), nil +} + +func (e *EdgeInfo) GetPropertyGroupURI(pg *PropertyGroup) (*url.URL, error) { + base := e.GetPrefix() + if base == nil { + return nil, fmt.Errorf("%w: edge base uri is nil", ErrInvalid) + } + rel := pg.GetPrefix() + if rel == nil { + return nil, fmt.Errorf("%w: property group prefix is nil", ErrInvalid) + } + return base.ResolveReference(rel), nil +} + +func (e *EdgeInfo) GetPropertyGroupChunkURI(pg *PropertyGroup, chunkIndex int64) (*url.URL, error) { + uri, err := e.GetPropertyGroupURI(pg) + if err != nil { + return nil, err + } + return uri.JoinPath(fmt.Sprintf("%s%d", chunkPrefix, chunkIndex)), nil +} + +// concat builds the concatenated edge label, mirroring Java/C++. +func concat(srcLabel, edgeLabel, dstLabel string) string { + return srcLabel + regularSeparator + edgeLabel + regularSeparator + dstLabel +} + +// Key returns the concat key of this edge info. +func (e *EdgeInfo) Key() string { + return concat(e.SrcType, e.EdgeType, e.DstType) +} + +// Init builds internal indices after unmarshalling or manual construction. +func (e *EdgeInfo) Init() error { + if e.Prefix != "" && e.BaseURI == nil { + p := e.Prefix + if !strings.HasSuffix(p, "/") { + p += "/" + } + u, err := url.Parse(p) + if err != nil { + return fmt.Errorf("%w: invalid edge base uri %q: %v", ErrInvalid, e.Prefix, err) + } + e.BaseURI = u + } + e.adjMap = make(map[AdjListType]*AdjacentList) + for i := range e.Adjacent { + adj := e.Adjacent[i] + if adj.Prefix != "" && adj.baseURI == nil { + u, err := url.Parse(adj.Prefix) + if err != nil { + return fmt.Errorf("%w: invalid adjacent list prefix %q: %v", ErrInvalid, adj.Prefix, err) + } + adj.baseURI = u + } + if adj.Type == "" { + switch { + case adj.Ordered && adj.Aligned == alignedBySrc: + adj.Type = AdjListOrderedBySource + case adj.Ordered && adj.Aligned == alignedByDst: + adj.Type = AdjListOrderedByDest + case !adj.Ordered && adj.Aligned == alignedBySrc: + adj.Type = AdjListUnorderedBySource + case !adj.Ordered && adj.Aligned == alignedByDst: + adj.Type = AdjListUnorderedByDest + } + } + if adj.Type != "" { + e.adjMap[adj.Type] = adj + } + } + for _, g := range e.PropertyGroups { + if err := g.Init(); err != nil { + return err + } + } + e.pgroups = newPropertyGroups(e.PropertyGroups) + return nil +} + +// Validate checks that the EdgeInfo is structurally valid. +func (e *EdgeInfo) Validate() error { + if e.SrcType == "" || e.EdgeType == "" || e.DstType == "" { + return fmt.Errorf("%w: edge src/edge/dst type must be non-empty", ErrMissingField) + } + if e.ChunkSize <= 0 || e.SrcChunkSize <= 0 || e.DstChunkSize <= 0 { + return fmt.Errorf("%w: edge %s has non-positive chunk sizes", ErrInvalid, e.Key()) + } + if e.BaseURI == nil && e.Prefix == "" { + return fmt.Errorf("%w: edge base uri/prefix is empty", ErrMissingField) + } + if len(e.Adjacent) == 0 { + return fmt.Errorf("%w: edge %s has no adjacent list definitions", ErrMissingField, e.Key()) + } + for _, a := range e.Adjacent { + if a.Type == "" { + return fmt.Errorf("%w: edge %s has adjacent list with empty type", ErrMissingField, e.Key()) + } + if !a.Type.IsValid() { + return fmt.Errorf("%w: invalid adjacent list type %q for edge %s", ErrInvalid, a.Type, e.Key()) + } + if err := a.Validate(); err != nil { + return fmt.Errorf("%w: edge %s adjacent list %q: %v", ErrInvalid, e.Key(), a.Type, err) + } + } + + // property groups non-empty and unique, with SINGLE cardinality (Java rule) + seen := make(map[string]struct{}) + for _, g := range e.PropertyGroups { + if err := g.Validate(); err != nil { + return fmt.Errorf("%w: edge %s property group: %v", ErrInvalid, e.Key(), err) + } + for _, p := range g.Properties { + if p.Cardinality != "" && p.Cardinality != CardinalitySingle { + return fmt.Errorf("%w: edge %s property %s must have single cardinality", ErrInvalid, e.Key(), p.Name) + } + if _, ok := seen[p.Name]; ok { + return fmt.Errorf("%w: edge %s has duplicated property %q", ErrDuplicate, e.Key(), p.Name) + } + seen[p.Name] = struct{}{} + } + } + return nil +} diff --git a/go/graphar/info/edge_info_test.go b/go/graphar/info/edge_info_test.go new file mode 100644 index 000000000..6e9c736e0 --- /dev/null +++ b/go/graphar/info/edge_info_test.go @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "testing" +) + +func TestEdgeInfo(t *testing.T) { + e := &EdgeInfo{ + SrcType: "person", + EdgeType: "knows", + DstType: "person", + ChunkSize: 100, + SrcChunkSize: 100, + DstChunkSize: 100, + Directed: true, + Prefix: "person_knows_person/", + Adjacent: []*AdjacentList{ + { + Type: AdjListOrderedBySource, + FileType: FileTypeParquet, + }, + }, + Version: VersionInfo{Major: 0, Minor: 1, Patch: 0}, + } + + t.Run("concat", func(t *testing.T) { + got := concat("a", "b", "c") + expected := "a" + regularSeparator + "b" + regularSeparator + "c" + if got != expected { + t.Errorf("concat() = %v, want %v", got, expected) + } + }) + + t.Run("key", func(t *testing.T) { + got := e.Key() + expected := "person" + regularSeparator + "knows" + regularSeparator + "person" + if got != expected { + t.Errorf("Key() = %v, want %v", got, expected) + } + }) + + t.Run("init", func(t *testing.T) { + e.PropertyGroups = []*PropertyGroup{ + { + Properties: []*Property{{Name: "weight", DataType: DataTypeDouble}}, + FileType: FileTypeParquet, + }, + } + if err := e.Init(); err != nil { + t.Fatalf("init failed: %v", err) + } + if !e.pgroups.hasProperty("weight") { + t.Errorf("expected to have property weight after init") + } + }) + + t.Run("validate", func(t *testing.T) { + if err := e.Validate(); err != nil { + t.Errorf("validate failed: %v", err) + } + + // empty types + eErr := *e + eErr.SrcType = "" + if err := eErr.Validate(); err == nil { + t.Error("expected error for empty src/edge/dst type") + } + + // non-positive chunk sizes + eErr = *e + eErr.ChunkSize = 0 + if err := eErr.Validate(); err == nil { + t.Error("expected error for non-positive chunk_size") + } + + // base uri/prefix empty + eErr = *e + eErr.Prefix = "" + eErr.BaseURI = nil + if err := eErr.Validate(); err == nil { + t.Error("expected error for empty base uri/prefix") + } + + // missing Adjacent + eErr = *e + eErr.Adjacent = nil + if err := eErr.Validate(); err == nil { + t.Error("expected error for missing adjacent list definitions") + } + + // adjacent list with empty type + eErr = *e + eErr.Adjacent = []*AdjacentList{{Type: ""}} + if err := eErr.Validate(); err == nil { + t.Error("expected error for adjacent list with empty type") + } + + // property with empty name + eErr = *e + eErr.PropertyGroups = []*PropertyGroup{{Properties: []*Property{{Name: "", DataType: DataTypeInt64}}, FileType: FileTypeParquet}} + if err := eErr.Validate(); err == nil { + t.Error("expected error for empty property name") + } + + // duplicated property name + eErr = *e + eErr.PropertyGroups = []*PropertyGroup{{ + Properties: []*Property{ + {Name: "p", DataType: DataTypeInt64}, + {Name: "p", DataType: DataTypeInt64}, + }, + FileType: FileTypeParquet, + }} + if err := eErr.Validate(); err == nil { + t.Error("expected error for duplicated property") + } + + // cardinality must be single + eErr = *e + eErr.PropertyGroups = []*PropertyGroup{{ + Properties: []*Property{ + {Name: "p", DataType: DataTypeInt64, Cardinality: CardinalityList}, + }, + FileType: FileTypeParquet, + }} + if err := eErr.Validate(); err == nil { + t.Error("expected error for non-single cardinality") + } + }) +} diff --git a/go/graphar/info/errors.go b/go/graphar/info/errors.go new file mode 100644 index 000000000..437c9d002 --- /dev/null +++ b/go/graphar/info/errors.go @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import "errors" + +var ( + // ErrNotFound indicates a requested info object (e.g. vertex type, edge triplet) doesn't exist. + ErrNotFound = errors.New("graphar info: not found") + + // ErrInvalid indicates an info object is structurally invalid. + ErrInvalid = errors.New("graphar info: invalid") + + // ErrParse indicates parsing failed (e.g. YAML decoding error). + ErrParse = errors.New("graphar info: parse error") + + // ErrUnsupported indicates an unsupported feature or version. + ErrUnsupported = errors.New("graphar info: unsupported") + + // ErrMissingField indicates a required field is missing in YAML. + ErrMissingField = errors.New("graphar info: missing field") + + // ErrDuplicate indicates a duplicated entity (e.g. property name, label). + ErrDuplicate = errors.New("graphar info: duplicate") +) diff --git a/go/graphar/info/graph_info.go b/go/graphar/info/graph_info.go new file mode 100644 index 000000000..664011536 --- /dev/null +++ b/go/graphar/info/graph_info.go @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Package info contains GraphAr metadata models and utilities for +// parsing and serializing Info YAML files (graph, vertex, edge). +// +// The design is inspired by the Java info module, but follows +// idiomatic Go conventions (errors instead of exceptions, simple +// struct types, and explicit validation). + +package info + +import ( + "fmt" + "net/url" +) + +// concatEdgeTriplet joins src/edge/dst types into a stable key. +func concatEdgeTriplet(srcType, edgeType, dstType string) string { + return srcType + regularSeparator + edgeType + regularSeparator + dstType +} + +// GraphInfo aggregates metadata for a whole graph: vertices, edges, and layout. +type GraphInfo struct { + Name string `yaml:"name"` + Prefix string `yaml:"prefix"` + Version VersionInfo `yaml:"version"` + + VertexInfos []*VertexInfo `yaml:"-"` + EdgeInfos []*EdgeInfo `yaml:"-"` + + BaseURI *url.URL `yaml:"-"` + + vertexByType map[string]*VertexInfo + edgeByKey map[string]*EdgeInfo + types2StoreURI map[string]*url.URL +} + +func (g *GraphInfo) GetPrefix() *url.URL { + return g.BaseURI +} + +func (g *GraphInfo) HasVertexInfo(vertexType string) bool { + _, ok := g.vertexByType[vertexType] + return ok +} + +func (g *GraphInfo) HasEdgeInfo(srcType, edgeType, dstType string) bool { + _, ok := g.edgeByKey[concatEdgeTriplet(srcType, edgeType, dstType)] + return ok +} + +func (g *GraphInfo) GetVertexInfo(vertexType string) (*VertexInfo, error) { + v, ok := g.vertexByType[vertexType] + if !ok { + return nil, fmt.Errorf("%w: vertex type %q", ErrNotFound, vertexType) + } + return v, nil +} + +func (g *GraphInfo) GetEdgeInfo(srcType, edgeType, dstType string) (*EdgeInfo, error) { + k := concatEdgeTriplet(srcType, edgeType, dstType) + e, ok := g.edgeByKey[k] + if !ok { + return nil, fmt.Errorf("%w: edge triplet %q", ErrNotFound, k) + } + return e, nil +} + +func (g *GraphInfo) SetStoreURIForVertex(vertexType string, storeURI *url.URL) error { + if storeURI == nil { + return fmt.Errorf("%w: storeURI is nil", ErrInvalid) + } + if !g.HasVertexInfo(vertexType) { + return fmt.Errorf("%w: vertex type %q", ErrNotFound, vertexType) + } + if g.types2StoreURI == nil { + g.types2StoreURI = make(map[string]*url.URL) + } + g.types2StoreURI[vertexType] = storeURI + return nil +} + +func (g *GraphInfo) SetStoreURIForEdge(srcType, edgeType, dstType string, storeURI *url.URL) error { + if storeURI == nil { + return fmt.Errorf("%w: storeURI is nil", ErrInvalid) + } + k := concatEdgeTriplet(srcType, edgeType, dstType) + if _, ok := g.edgeByKey[k]; !ok { + return fmt.Errorf("%w: edge triplet %q", ErrNotFound, k) + } + if g.types2StoreURI == nil { + g.types2StoreURI = make(map[string]*url.URL) + } + g.types2StoreURI[k] = storeURI + return nil +} + +func (g *GraphInfo) GetStoreURIForVertex(vertexType string) (*url.URL, error) { + if g.types2StoreURI == nil { + return nil, fmt.Errorf("%w: store uri not set", ErrNotFound) + } + u, ok := g.types2StoreURI[vertexType] + if !ok { + return nil, fmt.Errorf("%w: vertex type %q", ErrNotFound, vertexType) + } + return u, nil +} + +func (g *GraphInfo) GetStoreURIForEdge(srcType, edgeType, dstType string) (*url.URL, error) { + k := concatEdgeTriplet(srcType, edgeType, dstType) + if g.types2StoreURI == nil { + return nil, fmt.Errorf("%w: store uri not set", ErrNotFound) + } + u, ok := g.types2StoreURI[k] + if !ok { + return nil, fmt.Errorf("%w: edge triplet %q", ErrNotFound, k) + } + return u, nil +} + +func (g *GraphInfo) GetTypes2StoreURI() map[string]*url.URL { + out := make(map[string]*url.URL, len(g.types2StoreURI)) + for k, v := range g.types2StoreURI { + out[k] = v + } + return out +} + +// Init builds internal indices after unmarshalling or manual construction. +func (g *GraphInfo) Init() error { + if g.Prefix != "" && g.BaseURI == nil { + u, err := url.Parse(g.Prefix) + if err != nil { + return fmt.Errorf("%w: invalid graph base uri %q: %v", ErrInvalid, g.Prefix, err) + } + g.BaseURI = u + } + + g.vertexByType = make(map[string]*VertexInfo, len(g.VertexInfos)) + for _, v := range g.VertexInfos { + if err := v.Init(); err != nil { + return err + } + g.vertexByType[v.Type] = v + } + + g.edgeByKey = make(map[string]*EdgeInfo, len(g.EdgeInfos)) + for _, e := range g.EdgeInfos { + if err := e.Init(); err != nil { + return err + } + g.edgeByKey[e.Key()] = e + } + + if g.types2StoreURI == nil { + g.types2StoreURI = make(map[string]*url.URL) + } + return nil +} + +// Validate checks that the GraphInfo is structurally valid. +func (g *GraphInfo) Validate() error { + if g.Name == "" { + return fmt.Errorf("%w: graph name is empty", ErrMissingField) + } + if g.BaseURI == nil && g.Prefix == "" { + return fmt.Errorf("%w: graph base uri/prefix is empty", ErrMissingField) + } + if len(g.VertexInfos) != len(g.vertexByType) { + return fmt.Errorf("%w: vertex type map size mismatch (possible duplicate type)", ErrInvalid) + } + if len(g.EdgeInfos) != len(g.edgeByKey) { + return fmt.Errorf("%w: edge concat map size mismatch (possible duplicate edge)", ErrInvalid) + } + for _, v := range g.VertexInfos { + if err := v.Validate(); err != nil { + return err + } + } + for _, e := range g.EdgeInfos { + if err := e.Validate(); err != nil { + return err + } + } + return nil +} + +// Compatibility helpers (keep old method names temporarily during refactor). +func (g *GraphInfo) HasVertex(t string) bool { return g.HasVertexInfo(t) } +func (g *GraphInfo) HasEdge(src, edge, dst string) bool { return g.HasEdgeInfo(src, edge, dst) } +func (g *GraphInfo) VertexInfoByType(t string) (*VertexInfo, bool) { + v, ok := g.vertexByType[t] + return v, ok +} +func (g *GraphInfo) EdgeInfoByTypes(src, edge, dst string) (*EdgeInfo, bool) { + e, ok := g.edgeByKey[concatEdgeTriplet(src, edge, dst)] + return e, ok +} diff --git a/go/graphar/info/graph_info_test.go b/go/graphar/info/graph_info_test.go new file mode 100644 index 000000000..28a9af3c3 --- /dev/null +++ b/go/graphar/info/graph_info_test.go @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "testing" +) + +func TestGraphInfo(t *testing.T) { + vi := &VertexInfo{ + Type: "person", + ChunkSize: 100, + Prefix: "person/", + Version: VersionInfo{Major: 0, Minor: 1, Patch: 0}, + } + ei := &EdgeInfo{ + SrcType: "person", + EdgeType: "knows", + DstType: "person", + ChunkSize: 100, + SrcChunkSize: 100, + DstChunkSize: 100, + Prefix: "person_knows_person/", + Adjacent: []*AdjacentList{ + {Type: AdjListOrderedBySource, FileType: FileTypeParquet}, + }, + Version: VersionInfo{Major: 0, Minor: 1, Patch: 0}, + } + + g := &GraphInfo{ + Name: "test_graph", + Prefix: "/tmp/graphar/", + VertexInfos: []*VertexInfo{vi}, + EdgeInfos: []*EdgeInfo{ei}, + Version: VersionInfo{Major: 0, Minor: 1, Patch: 0}, + } + + t.Run("init", func(t *testing.T) { + if err := g.Init(); err != nil { + t.Fatalf("init failed: %v", err) + } + }) + + t.Run("validate", func(t *testing.T) { + if err := g.Validate(); err != nil { + t.Errorf("validate failed: %v", err) + } + + invalidG := &GraphInfo{Name: ""} + if err := invalidG.Validate(); err == nil { + t.Errorf("expected error for empty Name") + } + }) + + t.Run("Queries", func(t *testing.T) { + if !g.HasVertexInfo("person") { + t.Errorf("expected to have vertex person") + } + if g.HasVertexInfo("robot") { + t.Errorf("did not expect to have vertex robot") + } + + if !g.HasEdgeInfo("person", "knows", "person") { + t.Errorf("expected to have edge person-knows-person") + } + + vInfo, err := g.GetVertexInfo("person") + if err != nil || vInfo.Type != "person" { + t.Errorf("GetVertexInfo failed") + } + + eInfo, err := g.GetEdgeInfo("person", "knows", "person") + if err != nil || eInfo.EdgeType != "knows" { + t.Errorf("GetEdgeInfo failed") + } + }) +} diff --git a/go/graphar/info/integration_test.go b/go/graphar/info/integration_test.go new file mode 100644 index 000000000..a24d8725e --- /dev/null +++ b/go/graphar/info/integration_test.go @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "path/filepath" + "runtime" + "testing" +) + +func getRepoRoot() string { + _, filename, _, _ := runtime.Caller(0) + // current file: go/graphar/info/integration_test.go + // go/graphar/info -> go/graphar -> go -> root + return filepath.Join(filepath.Dir(filename), "..", "..", "..") +} + +func TestLoadIntegrationData(t *testing.T) { + root := getRepoRoot() + + t.Run("LoadLdbcSampleCSV", func(t *testing.T) { + path := filepath.Join(root, "testing", "ldbc_sample", "csv", "ldbc_sample.graph.yml") + g, err := LoadGraphInfo(path) + if err != nil { + t.Fatalf("failed to load ldbc_sample graph: %v", err) + } + if g.Name != "ldbc_sample" { + t.Errorf("expected graph name ldbc_sample, got %s", g.Name) + } + if !g.HasVertexInfo("person") { + t.Errorf("expected person vertex") + } + if !g.HasEdgeInfo("person", "knows", "person") { + t.Errorf("expected person-knows-person edge") + } + }) + + t.Run("LoadModernGraph", func(t *testing.T) { + path := filepath.Join(root, "testing", "modern_graph", "modern_graph.graph.yml") + g, err := LoadGraphInfo(path) + if err != nil { + t.Fatalf("failed to load modern_graph: %v", err) + } + if g.Name != "modern_graph" { + t.Errorf("expected graph name modern_graph, got %s", g.Name) + } + }) + + t.Run("LoadVertexInfoDirectly", func(t *testing.T) { + path := filepath.Join(root, "testing", "ldbc_sample", "csv", "person.vertex.yml") + v, err := LoadVertexInfo(path) + if err != nil { + t.Fatalf("failed to load vertex info: %v", err) + } + if v.Type != "person" { + t.Errorf("expected person vertex type, got %s", v.Type) + } + }) + + t.Run("LoadEdgeInfoDirectly", func(t *testing.T) { + path := filepath.Join(root, "testing", "ldbc_sample", "csv", "person_knows_person.edge.yml") + e, err := LoadEdgeInfo(path) + if err != nil { + t.Fatalf("failed to load edge info: %v", err) + } + if e.EdgeType != "knows" { + t.Errorf("expected knows edge type, got %s", e.EdgeType) + } + }) +} diff --git a/go/graphar/info/property.go b/go/graphar/info/property.go new file mode 100644 index 000000000..5fc0016b2 --- /dev/null +++ b/go/graphar/info/property.go @@ -0,0 +1,189 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "fmt" + "net/url" + "reflect" +) + +// Property describes a single property in a PropertyGroup. +type Property struct { + Name string `yaml:"name"` + DataType DataType `yaml:"data_type"` + Cardinality Cardinality `yaml:"cardinality,omitempty"` + Primary bool `yaml:"is_primary"` + Nullable bool `yaml:"is_nullable"` +} + +// PropertyGroup groups properties stored together with the same file type and base URI. +type PropertyGroup struct { + Properties []*Property `yaml:"properties"` + FileType FileType `yaml:"file_type"` + // Prefix where this group's files are stored, relative to the vertex/edge base URI. + Prefix string `yaml:"prefix"` + + baseURI *url.URL `yaml:"-"` +} + +func (pg *PropertyGroup) Validate() error { + if pg.FileType == "" { + return fmt.Errorf("%w: property group file type is empty", ErrMissingField) + } + if !pg.FileType.IsValid() { + return fmt.Errorf("%w: invalid file type %q", ErrInvalid, pg.FileType) + } + if len(pg.Properties) == 0 { + return fmt.Errorf("%w: property group has no properties", ErrMissingField) + } + for _, p := range pg.Properties { + if p.Name == "" { + return fmt.Errorf("%w: property name is empty", ErrMissingField) + } + if p.DataType != "" && !p.DataType.IsValid() { + return fmt.Errorf("%w: invalid data type %q for property %q", ErrInvalid, p.DataType, p.Name) + } + if p.Cardinality != "" && !p.Cardinality.IsValid() { + return fmt.Errorf("%w: invalid cardinality %q for property %q", ErrInvalid, p.Cardinality, p.Name) + } + } + return nil +} + +func (pg *PropertyGroup) Init() error { + if pg.Prefix != "" && pg.baseURI == nil { + u, err := url.Parse(pg.Prefix) + if err != nil { + return fmt.Errorf("%w: invalid property group prefix %q: %v", ErrInvalid, pg.Prefix, err) + } + pg.baseURI = u + } + return nil +} + +func (pg *PropertyGroup) GetPrefix() *url.URL { + return pg.baseURI +} + +func (pg *PropertyGroup) HasProperty(propertyName string) bool { + for _, p := range pg.Properties { + if p.Name == propertyName { + return true + } + } + return false +} + +// propertyGroups is an internal helper mirroring the Java PropertyGroups class. +type propertyGroups struct { + groups []*PropertyGroup + propByName map[string]*Property + groupByKey map[string]*PropertyGroup +} + +// newPropertyGroups builds a propertyGroups helper from a list of groups. +func newPropertyGroups(groups []*PropertyGroup) propertyGroups { + pgs := propertyGroups{ + groups: make([]*PropertyGroup, len(groups)), + propByName: make(map[string]*Property), + groupByKey: make(map[string]*PropertyGroup), + } + copy(pgs.groups, groups) + + for _, g := range groups { + for _, p := range g.Properties { + pgs.propByName[p.Name] = p + pgs.groupByKey[p.Name] = g + } + } + return pgs +} + +// hasProperty reports whether a property with the given name exists. +func (pgs propertyGroups) hasProperty(name string) bool { + _, ok := pgs.propByName[name] + return ok +} + +// hasPropertyGroup reports whether an identical group exists. +func (pgs propertyGroups) hasPropertyGroup(group *PropertyGroup) bool { + for _, g := range pgs.groups { + if reflect.DeepEqual(g, group) { + return true + } + } + return false +} + +// propertyGroupNum returns the number of property groups. +func (pgs propertyGroups) propertyGroupNum() int { + return len(pgs.groups) +} + +// cardinality returns the cardinality of a named property. +func (pgs propertyGroups) cardinality(name string) (Cardinality, error) { + p, ok := pgs.propByName[name] + if !ok { + return "", fmt.Errorf("%w: property %s not found", ErrNotFound, name) + } + return p.Cardinality, nil +} + +// propertyType returns the DataType of a named property. +func (pgs propertyGroups) propertyType(name string) (DataType, error) { + p, ok := pgs.propByName[name] + if !ok { + return "", fmt.Errorf("%w: property %s not found", ErrNotFound, name) + } + return p.DataType, nil +} + +// isPrimaryKey reports whether the given property is a primary key. +func (pgs propertyGroups) isPrimaryKey(name string) (bool, error) { + p, ok := pgs.propByName[name] + if !ok { + return false, fmt.Errorf("%w: property %s not found", ErrNotFound, name) + } + return p.Primary, nil +} + +// isNullableKey reports whether the given property is nullable. +func (pgs propertyGroups) isNullableKey(name string) (bool, error) { + p, ok := pgs.propByName[name] + if !ok { + return false, fmt.Errorf("%w: property %s not found", ErrNotFound, name) + } + return p.Nullable, nil +} + +// propertyGroupList returns all property groups. +func (pgs propertyGroups) propertyGroupList() []*PropertyGroup { + out := make([]*PropertyGroup, len(pgs.groups)) + copy(out, pgs.groups) + return out +} + +// propertyGroupFor returns the group that contains the given property. +func (pgs propertyGroups) propertyGroupFor(name string) (*PropertyGroup, error) { + g, ok := pgs.groupByKey[name] + if !ok { + return nil, fmt.Errorf("%w: property group for %s not found", ErrNotFound, name) + } + return g, nil +} diff --git a/go/graphar/info/property_test.go b/go/graphar/info/property_test.go new file mode 100644 index 000000000..5a39e3bbc --- /dev/null +++ b/go/graphar/info/property_test.go @@ -0,0 +1,138 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import "testing" + +func TestPropertyGroups(t *testing.T) { + p1 := &Property{Name: "id", DataType: DataTypeInt64, Primary: true, Nullable: false} + p2 := &Property{Name: "name", DataType: DataTypeString, Primary: false, Nullable: true} + pg1 := &PropertyGroup{ + Properties: []*Property{p1, p2}, + FileType: FileTypeParquet, + Prefix: "p1/", + } + + p3 := &Property{Name: "age", DataType: DataTypeInt32, Primary: false, Nullable: true} + pg2 := &PropertyGroup{ + Properties: []*Property{p3}, + FileType: FileTypeCSV, + Prefix: "p2/", + } + + groups := newPropertyGroups([]*PropertyGroup{pg1, pg2}) + + t.Run("HasProperty", func(t *testing.T) { + if !groups.hasProperty("id") { + t.Errorf("expected to have property id") + } + if !groups.hasProperty("age") { + t.Errorf("expected to have property age") + } + if groups.hasProperty("nonexistent") { + t.Errorf("did not expect to have nonexistent property") + } + }) + + t.Run("PropertyGroupNum", func(t *testing.T) { + if groups.propertyGroupNum() != 2 { + t.Errorf("expected 2 groups, got %d", groups.propertyGroupNum()) + } + }) + + t.Run("PropertyDetails", func(t *testing.T) { + pType, err := groups.propertyType("id") + if err != nil || pType != DataTypeInt64 { + t.Errorf("expected DataTypeInt64 for id, got %v, err: %v", pType, err) + } + + card, err := groups.cardinality("id") + if err != nil || card != "" { + t.Errorf("expected empty cardinality for id, got %v, err: %v", card, err) + } + + isPrimary, err := groups.isPrimaryKey("id") + if err != nil || !isPrimary { + t.Errorf("expected id to be primary key, got %v, err: %v", isPrimary, err) + } + + isNullable, err := groups.isNullableKey("id") + if err != nil || isNullable { + t.Errorf("expected id to be non-nullable, got %v, err: %v", isNullable, err) + } + + isPrimaryName, err := groups.isPrimaryKey("name") + if err != nil || isPrimaryName { + t.Errorf("expected name to not be primary key, got %v, err: %v", isPrimaryName, err) + } + + isNullableName, err := groups.isNullableKey("name") + if err != nil || !isNullableName { + t.Errorf("expected name to be nullable, got %v, err: %v", isNullableName, err) + } + + pAge := &Property{Name: "age", DataType: DataTypeInt32, Cardinality: CardinalitySingle} + pgAge := newPropertyGroups([]*PropertyGroup{{Properties: []*Property{pAge}, FileType: FileTypeParquet}}) + cardAge, err := pgAge.cardinality("age") + if err != nil || cardAge != CardinalitySingle { + t.Errorf("expected CardinalitySingle for age, got %v, err: %v", cardAge, err) + } + }) + + t.Run("PropertyGroupList", func(t *testing.T) { + list := groups.propertyGroupList() + if len(list) != 2 { + t.Errorf("expected 2 groups in list") + } + }) + + t.Run("PropertyGroupFor", func(t *testing.T) { + g, err := groups.propertyGroupFor("id") + if err != nil || g.Prefix != "p1/" { + t.Errorf("expected group with prefix p1/ for id, err: %v", err) + } + + g2, err := groups.propertyGroupFor("age") + if err != nil || g2.Prefix != "p2/" { + t.Errorf("expected group with prefix p2/ for age, err: %v", err) + } + + _, err = groups.propertyGroupFor("nonexistent") + if err == nil { + t.Errorf("did not expect group for nonexistent property") + } + }) + + t.Run("HasPropertyGroup", func(t *testing.T) { + if !groups.hasPropertyGroup(pg1) { + t.Errorf("expected to find existing property group pg1") + } + + otherPg := &PropertyGroup{Prefix: "other/", FileType: FileTypeCSV} + if groups.hasPropertyGroup(otherPg) { + t.Errorf("did not expect to find other property group") + } + }) + + t.Run("ErrorCases", func(t *testing.T) { + _, err := groups.propertyType("nonexistent") + if err == nil { + t.Errorf("expected error for nonexistent property") + } + }) +} diff --git a/go/graphar/info/types.go b/go/graphar/info/types.go new file mode 100644 index 000000000..0edf92c99 --- /dev/null +++ b/go/graphar/info/types.go @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +// DataType describes the logical type of a property value. +// The set of values mirrors the Java/C++ DataType enums. +type DataType string + +const ( + DataTypeBool DataType = "bool" + DataTypeInt32 DataType = "int32" + DataTypeInt64 DataType = "int64" + DataTypeFloat DataType = "float" + DataTypeDouble DataType = "double" + DataTypeString DataType = "string" + DataTypeList DataType = "list" + DataTypeDate DataType = "date" + DataTypeTimestamp DataType = "timestamp" +) + +// IsValid checks if the DataType is supported by GraphAr. +func (t DataType) IsValid() bool { + switch t { + case DataTypeBool, DataTypeInt32, DataTypeInt64, DataTypeFloat, DataTypeDouble, + DataTypeString, DataTypeList, DataTypeDate, DataTypeTimestamp: + return true + default: + return false + } +} + +// FileType describes the file format of the data. +type FileType string + +const ( + FileTypeCSV FileType = "csv" + FileTypeParquet FileType = "parquet" + FileTypeORC FileType = "orc" +) + +// IsValid checks if the FileType is supported by GraphAr. +func (t FileType) IsValid() bool { + switch t { + case FileTypeCSV, FileTypeParquet, FileTypeORC: + return true + default: + return false + } +} + +// Cardinality defines how multiple values are handled for a given property key. +type Cardinality string + +const ( + CardinalitySingle Cardinality = "single" + CardinalityList Cardinality = "list" + CardinalitySet Cardinality = "set" +) + +// IsValid checks if the Cardinality is supported by GraphAr. +func (c Cardinality) IsValid() bool { + switch c { + case CardinalitySingle, CardinalityList, CardinalitySet: + return true + default: + return false + } +} + +// AdjListType mirrors the Java AdjListType enum. +type AdjListType string + +const ( + AdjListUnorderedBySource AdjListType = "unordered_by_source" + AdjListUnorderedByDest AdjListType = "unordered_by_dest" + AdjListOrderedBySource AdjListType = "ordered_by_source" + AdjListOrderedByDest AdjListType = "ordered_by_dest" +) + +// IsValid checks if the AdjListType is supported by GraphAr. +func (t AdjListType) IsValid() bool { + switch t { + case AdjListUnorderedBySource, AdjListUnorderedByDest, + AdjListOrderedBySource, AdjListOrderedByDest: + return true + default: + return false + } +} + +// regularSeparator is the regular separator used when concatenating labels/types, +// consistent with GeneralParams.regularSeparator in Java/C++. +const regularSeparator = "__" + +const ( + // pathAdjList is the directory name for adjacent list data. + pathAdjList = "adj_list/" + // pathOffset is the directory name for offset data. + pathOffset = "offset/" + // fileVertexCount is the filename for vertex count metadata. + fileVertexCount = "vertex_count" + // fileEdgeCountPrefix is the prefix for edge count metadata files. + fileEdgeCountPrefix = "edge_count" + // chunkPrefix is the prefix for chunk data files. + chunkPrefix = "chunk" +) + +const ( + // alignedBySrc indicates data is aligned by source vertex. + alignedBySrc = "src" + // alignedByDst indicates data is aligned by destination vertex. + alignedByDst = "dst" +) + +const ( + // versionGarV1 is the legacy version string for GraphAr v1. + versionGarV1 = "gar/v1" +) diff --git a/go/graphar/info/version.go b/go/graphar/info/version.go new file mode 100644 index 000000000..336929904 --- /dev/null +++ b/go/graphar/info/version.go @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "fmt" + "strconv" + "strings" +) + +const ( + // maxSupportedMajor is the maximum major version of the GraphAr metadata supported by this SDK. + maxSupportedMajor = 1 + // maxSupportedMinor is the maximum minor version of the GraphAr metadata supported by this SDK. + maxSupportedMinor = 0 +) + +// VersionInfo represents a semantic version of the GraphAr metadata. +type VersionInfo struct { + Major int `yaml:"major"` + Minor int `yaml:"minor"` + Patch int `yaml:"patch"` +} + +// isSupported checks if the metadata version is supported by this SDK. +func (v VersionInfo) isSupported() bool { + if v.Major > maxSupportedMajor { + return false + } + if v.Major == maxSupportedMajor && v.Minor > maxSupportedMinor { + return false + } + return true +} + +// parseVersion parses a version string like "0.1.0" or "gar/v1" into VersionInfo. +func parseVersion(s string) (VersionInfo, error) { + if s == versionGarV1 { + return VersionInfo{Major: 1, Minor: 0, Patch: 0}, nil + } + parts := strings.Split(s, ".") + if len(parts) != 3 { + return VersionInfo{}, fmt.Errorf("%w: invalid version %q", ErrParse, s) + } + ints := make([]int, 3) + for i, p := range parts { + v, err := strconv.Atoi(p) + if err != nil { + return VersionInfo{}, fmt.Errorf("%w: invalid version component %q: %v", ErrParse, p, err) + } + ints[i] = v + } + return VersionInfo{Major: ints[0], Minor: ints[1], Patch: ints[2]}, nil +} + +func (v VersionInfo) String() string { + return fmt.Sprintf("%d.%d.%d", v.Major, v.Minor, v.Patch) +} + +// UnmarshalYAML implements the yaml.Unmarshaler interface to support parsing from string. +func (v *VersionInfo) UnmarshalYAML(unmarshal func(interface{}) error) error { + var s string + if err := unmarshal(&s); err != nil { + // Try to unmarshal as struct if string fails + type rawVersion VersionInfo + return unmarshal((*rawVersion)(v)) + } + parsed, err := parseVersion(s) + if err != nil { + return err + } + *v = parsed + return nil +} diff --git a/go/graphar/info/version_test.go b/go/graphar/info/version_test.go new file mode 100644 index 000000000..9f0aad89f --- /dev/null +++ b/go/graphar/info/version_test.go @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "testing" + + "gopkg.in/yaml.v3" +) + +func TestParseVersion(t *testing.T) { + tests := []struct { + input string + expected VersionInfo + wantErr bool + }{ + {"0.1.0", VersionInfo{0, 1, 0}, false}, + {"1.2.3", VersionInfo{1, 2, 3}, false}, + {"gar/v1", VersionInfo{1, 0, 0}, false}, + {"1.0", VersionInfo{}, true}, + {"1.2.3.4", VersionInfo{}, true}, + {"a.b.c", VersionInfo{}, true}, + {"1.a.0", VersionInfo{}, true}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := parseVersion(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("ParseVersion() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && got != tt.expected { + t.Errorf("ParseVersion() got = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestVersionUnmarshalYAML(t *testing.T) { + t.Run("string version", func(t *testing.T) { + var v VersionInfo + err := yaml.Unmarshal([]byte("0.1.0"), &v) + if err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + if v.Major != 0 || v.Minor != 1 || v.Patch != 0 { + t.Errorf("expected 0.1.0, got %v", v) + } + }) + + t.Run("struct version", func(t *testing.T) { + var v VersionInfo + err := yaml.Unmarshal([]byte("major: 1\nminor: 2\npatch: 3"), &v) + if err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + if v.Major != 1 || v.Minor != 2 || v.Patch != 3 { + t.Errorf("expected 1.2.3, got %v", v) + } + }) + + t.Run("gar/v1 version", func(t *testing.T) { + var v VersionInfo + err := yaml.Unmarshal([]byte("gar/v1"), &v) + if err != nil { + t.Fatalf("failed to unmarshal: %v", err) + } + if v.Major != 1 || v.Minor != 0 || v.Patch != 0 { + t.Errorf("expected 1.0.0 for gar/v1, got %v", v) + } + }) + + t.Run("invalid version", func(t *testing.T) { + var v VersionInfo + err := yaml.Unmarshal([]byte("invalid.version"), &v) + if err == nil { + t.Errorf("expected error for invalid version string") + } + }) +} + +func TestVersionString(t *testing.T) { + v := VersionInfo{Major: 1, Minor: 2, Patch: 3} + expected := "1.2.3" + if got := v.String(); got != expected { + t.Errorf("VersionInfo.String() = %v, want %v", got, expected) + } +} diff --git a/go/graphar/info/vertex_info.go b/go/graphar/info/vertex_info.go new file mode 100644 index 000000000..f3dc2bca4 --- /dev/null +++ b/go/graphar/info/vertex_info.go @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "fmt" + "net/url" + "strings" +) + +// VertexInfo describes metadata for a vertex type. +type VertexInfo struct { + Type string `yaml:"type"` + ChunkSize int64 `yaml:"chunk_size"` + PropertyGroups []*PropertyGroup `yaml:"property_groups"` + Labels []string `yaml:"labels,omitempty"` + BaseURI *url.URL `yaml:"-"` + Prefix string `yaml:"prefix"` // serialized form of BaseURI + Version VersionInfo `yaml:"version"` + + pgroups propertyGroups `yaml:"-"` +} + +func (v *VertexInfo) GetPrefix() *url.URL { + return v.BaseURI +} + +func (v *VertexInfo) HasProperty(propertyName string) bool { + return v.pgroups.hasProperty(propertyName) +} + +func (v *VertexInfo) IsPrimaryKey(propertyName string) (bool, error) { + return v.pgroups.isPrimaryKey(propertyName) +} + +func (v *VertexInfo) IsNullableKey(propertyName string) (bool, error) { + return v.pgroups.isNullableKey(propertyName) +} + +func (v *VertexInfo) GetPropertyType(propertyName string) (DataType, error) { + return v.pgroups.propertyType(propertyName) +} + +func (v *VertexInfo) GetPropertyCardinality(propertyName string) (Cardinality, error) { + return v.pgroups.cardinality(propertyName) +} + +func (v *VertexInfo) GetPropertyGroup(propertyName string) (*PropertyGroup, error) { + return v.pgroups.propertyGroupFor(propertyName) +} + +func (v *VertexInfo) GetPropertyGroupURI(pg *PropertyGroup) (*url.URL, error) { + base := v.GetPrefix() + if base == nil { + return nil, fmt.Errorf("%w: vertex base uri is nil", ErrInvalid) + } + rel := pg.GetPrefix() + if rel == nil { + return nil, fmt.Errorf("%w: property group prefix is nil", ErrInvalid) + } + return base.ResolveReference(rel), nil +} + +func (v *VertexInfo) GetPropertyGroupChunkURI(pg *PropertyGroup, chunkIndex int64) (*url.URL, error) { + uri, err := v.GetPropertyGroupURI(pg) + if err != nil { + return nil, err + } + return uri.JoinPath(fmt.Sprintf("%s%d", chunkPrefix, chunkIndex)), nil +} + +func (v *VertexInfo) GetVerticesNumFileURI() (*url.URL, error) { + base := v.GetPrefix() + if base == nil { + return nil, fmt.Errorf("%w: vertex base uri is nil", ErrInvalid) + } + return base.JoinPath(fileVertexCount), nil +} + +// Init builds internal indices after unmarshalling or manual construction. +func (v *VertexInfo) Init() error { + if v.Prefix != "" && v.BaseURI == nil { + p := v.Prefix + if !strings.HasSuffix(p, "/") { + p += "/" + } + u, err := url.Parse(p) + if err != nil { + return fmt.Errorf("%w: invalid vertex base uri %q: %v", ErrInvalid, v.Prefix, err) + } + v.BaseURI = u + } + v.pgroups = newPropertyGroups(v.PropertyGroups) + for _, g := range v.PropertyGroups { + if err := g.Init(); err != nil { + return err + } + } + return nil +} + +// Validate checks that the VertexInfo is structurally valid. +func (v *VertexInfo) Validate() error { + if v.Type == "" { + return fmt.Errorf("%w: vertex type is empty", ErrMissingField) + } + if v.ChunkSize <= 0 { + return fmt.Errorf("%w: vertex %s has non-positive chunk_size %d", ErrInvalid, v.Type, v.ChunkSize) + } + if v.BaseURI == nil && v.Prefix == "" { + return fmt.Errorf("%w: vertex base uri/prefix is empty", ErrMissingField) + } + // property groups non-empty and unique property names + seen := make(map[string]struct{}) + for _, g := range v.PropertyGroups { + if err := g.Validate(); err != nil { + return fmt.Errorf("%w: vertex %s property group: %v", ErrInvalid, v.Type, err) + } + for _, p := range g.Properties { + if _, ok := seen[p.Name]; ok { + return fmt.Errorf("%w: vertex %s has duplicated property %q", ErrDuplicate, v.Type, p.Name) + } + seen[p.Name] = struct{}{} + } + } + return nil +} + +// PropertyGroupFor returns the property group containing the given property. +func (v *VertexInfo) PropertyGroupFor(name string) (*PropertyGroup, error) { + return v.pgroups.propertyGroupFor(name) +} diff --git a/go/graphar/info/vertex_info_test.go b/go/graphar/info/vertex_info_test.go new file mode 100644 index 000000000..fa467d3c2 --- /dev/null +++ b/go/graphar/info/vertex_info_test.go @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import "testing" + +func TestVertexInfo(t *testing.T) { + v := &VertexInfo{ + Type: "person", + ChunkSize: 100, + Prefix: "person/", + PropertyGroups: []*PropertyGroup{ + { + FileType: FileTypeParquet, + Properties: []*Property{ + {Name: "id", DataType: DataTypeInt64, Primary: true}, + }, + }, + }, + Version: VersionInfo{Major: 0, Minor: 1, Patch: 0}, + } + + t.Run("init", func(t *testing.T) { + if err := v.Init(); err != nil { + t.Fatalf("init failed: %v", err) + } + if !v.HasProperty("id") { + t.Errorf("expected to have property id after init") + } + }) + + t.Run("validate", func(t *testing.T) { + if err := v.Validate(); err != nil { + t.Errorf("validate failed: %v", err) + } + + // Type empty + vErr := *v + vErr.Type = "" + if err := vErr.Validate(); err == nil { + t.Error("expected error for empty type") + } + + // ChunkSize <= 0 + vErr = *v + vErr.ChunkSize = 0 + if err := vErr.Validate(); err == nil { + t.Error("expected error for zero chunk_size") + } + + // Prefix empty + vErr = *v + vErr.Prefix = "" + vErr.BaseURI = nil + if err := vErr.Validate(); err == nil { + t.Error("expected error for empty prefix/base_uri") + } + + // Duplicated property name + vErr = *v + vErr.PropertyGroups = []*PropertyGroup{ + { + FileType: FileTypeParquet, + Properties: []*Property{ + {Name: "id", DataType: DataTypeInt64}, + {Name: "id", DataType: DataTypeInt64}, + }, + }, + } + if err := vErr.Validate(); err == nil { + t.Error("expected error for duplicated property name") + } + + // Empty property name + vErr = *v + vErr.PropertyGroups = []*PropertyGroup{ + { + FileType: FileTypeParquet, + Properties: []*Property{ + {Name: "", DataType: DataTypeInt64}, + }, + }, + } + if err := vErr.Validate(); err == nil { + t.Error("expected error for empty property name") + } + }) + + t.Run("GetPropertyGroup", func(t *testing.T) { + pg, err := v.GetPropertyGroup("id") + if err != nil { + t.Fatalf("GetPropertyGroup failed: %v", err) + } + if pg.FileType != FileTypeParquet { + t.Errorf("expected parquet group") + } + }) +} diff --git a/go/graphar/info/yaml_edge.go b/go/graphar/info/yaml_edge.go new file mode 100644 index 000000000..be93b0c3e --- /dev/null +++ b/go/graphar/info/yaml_edge.go @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "bytes" + "net/url" + "os" + + internalyaml "github.com/apache/incubator-graphar/go/graphar/internal/yaml" +) + +// LoadEdgeInfo loads a single edge info YAML file from local filesystem. +func LoadEdgeInfo(path string) (*EdgeInfo, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + return LoadEdgeInfoFromBytes(b) +} + +// LoadEdgeInfoFromBytes loads a single edge info from bytes. +func LoadEdgeInfoFromBytes(b []byte) (*EdgeInfo, error) { + var e EdgeInfo + if err := internalyaml.DecodeEdge(bytes.NewReader(b), &e); err != nil { + return nil, err + } + + if e.Prefix != "" { + u, err := url.Parse(e.Prefix) + if err != nil { + return nil, err + } + e.BaseURI = u + } + if err := e.Init(); err != nil { + return nil, err + } + if err := e.Validate(); err != nil { + return nil, err + } + return &e, nil +} diff --git a/go/graphar/info/yaml_graph.go b/go/graphar/info/yaml_graph.go new file mode 100644 index 000000000..2c6a9604e --- /dev/null +++ b/go/graphar/info/yaml_graph.go @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "bytes" + "fmt" + "io" + "net/url" + "os" + "path/filepath" + + internalyaml "github.com/apache/incubator-graphar/go/graphar/internal/yaml" +) + +// Loader defines a function type to load file content from a given path. +type Loader func(path string) ([]byte, error) + +// LoadGraphInfo reads a graph info YAML file and all referenced vertex/edge info files from local filesystem. +func LoadGraphInfo(path string) (*GraphInfo, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + l := func(p string) ([]byte, error) { + return os.ReadFile(p) + } + + return LoadGraphInfoFromBytes(b, path, l) +} + +// LoadGraphInfoFromBytes loads GraphInfo from bytes and uses the loader to load referenced info files. +func LoadGraphInfoFromBytes(b []byte, path string, l Loader) (*GraphInfo, error) { + baseDir := filepath.Dir(path) + // Ensure baseURI has a trailing slash for proper relative resolution + baseURIStr := baseDir + if baseURIStr != "" && !os.IsPathSeparator(baseURIStr[len(baseURIStr)-1]) { + baseURIStr += string(os.PathSeparator) + } + baseURI, _ := url.Parse(baseURIStr) + + return parseGraphInfoYAML(bytes.NewReader(b), baseURI, baseDir, l) +} + +// parseGraphInfoYAML parses a graph-level YAML from r, resolving referenced +// vertex/edge info files relative to baseDir/baseURI using the provided loader. +func parseGraphInfoYAML(r io.Reader, baseURI *url.URL, baseDir string, l Loader) (*GraphInfo, error) { + gy, err := internalyaml.DecodeGraph(r) + if err != nil { + return nil, fmt.Errorf("%w: decode graph yaml: %v", ErrParse, err) + } + + if gy.Version == "" { + return nil, fmt.Errorf("%w: graph yaml missing version", ErrMissingField) + } + v, err := parseVersion(gy.Version) + if err != nil { + return nil, fmt.Errorf("%w: parse graph version: %v", ErrParse, err) + } + if !v.isSupported() { + return nil, fmt.Errorf("%w: graph info version %s is not supported", ErrUnsupported, v.String()) + } + + g := &GraphInfo{ + Name: gy.Name, + Prefix: gy.Prefix, + Version: v, + BaseURI: baseURI, + } + + for _, vpath := range gy.Vertices { + abs := filepath.Join(baseDir, vpath) + var vi *VertexInfo + if l != nil { + vb, err := l(abs) + if err != nil { + return nil, fmt.Errorf("%w: load vertex info %q: %v", ErrNotFound, vpath, err) + } + vi, err = LoadVertexInfoFromBytes(vb) + } else { + vi, err = LoadVertexInfo(abs) + } + if err != nil { + return nil, fmt.Errorf("%w: parse vertex info %q: %v", ErrParse, vpath, err) + } + g.VertexInfos = append(g.VertexInfos, vi) + } + + for _, epath := range gy.Edges { + abs := filepath.Join(baseDir, epath) + var ei *EdgeInfo + if l != nil { + eb, err := l(abs) + if err != nil { + return nil, fmt.Errorf("%w: load edge info %q: %v", ErrNotFound, epath, err) + } + ei, err = LoadEdgeInfoFromBytes(eb) + } else { + ei, err = LoadEdgeInfo(abs) + } + if err != nil { + return nil, fmt.Errorf("%w: parse edge info %q: %v", ErrParse, epath, err) + } + g.EdgeInfos = append(g.EdgeInfos, ei) + } + + if err := g.Init(); err != nil { + return nil, err + } + if err := g.Validate(); err != nil { + return nil, err + } + return g, nil +} diff --git a/go/graphar/info/yaml_test.go b/go/graphar/info/yaml_test.go new file mode 100644 index 000000000..646ba0a0b --- /dev/null +++ b/go/graphar/info/yaml_test.go @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "errors" + "net/url" + "strings" + "testing" +) + +func TestParseGraphInfoYAML(t *testing.T) { + yamlStr := ` +name: test_graph +prefix: /tmp/graphar/ +vertices: [] +edges: [] +version: 0.1.0 +` + u, _ := url.Parse("/tmp/") + _, err := parseGraphInfoYAML(strings.NewReader(yamlStr), u, "/tmp/", nil) + if err != nil && !errors.Is(err, ErrUnsupported) { + t.Errorf("expected ErrUnsupported for version 2.0.0, got %v", err) + } +} + +func TestSentinelErrors(t *testing.T) { + t.Run("MissingField", func(t *testing.T) { + yamlStr := `name: test` // missing version and prefix + u, _ := url.Parse("/tmp/") + _, err := parseGraphInfoYAML(strings.NewReader(yamlStr), u, "/tmp/", nil) + if !errors.Is(err, ErrMissingField) { + t.Errorf("expected ErrMissingField, got %v", err) + } + }) + + t.Run("NotFound", func(t *testing.T) { + yamlStr := ` +name: test +prefix: /tmp/ +version: 0.1.0 +vertices: [non-existent.vertex.yaml] +` + loader := func(path string) ([]byte, error) { + return nil, ErrNotFound + } + u, _ := url.Parse("/tmp/") + _, err := parseGraphInfoYAML(strings.NewReader(yamlStr), u, "/tmp/", loader) + if !errors.Is(err, ErrNotFound) { + t.Errorf("expected ErrNotFound for missing vertex info, got %v", err) + } + }) + + t.Run("Invalid", func(t *testing.T) { + vi := &VertexInfo{Type: "person", ChunkSize: -1} + err := vi.Validate() + if !errors.Is(err, ErrInvalid) { + t.Errorf("expected ErrInvalid for negative chunk size, got %v", err) + } + }) +} + +func TestParseInvalidYAML(t *testing.T) { + invalidYaml := ` +name: : invalid +` + u, _ := url.Parse("/tmp/") + _, err := parseGraphInfoYAML(strings.NewReader(invalidYaml), u, "/tmp/", nil) + if err == nil { + t.Errorf("expected error for invalid YAML") + } +} diff --git a/go/graphar/info/yaml_vertex.go b/go/graphar/info/yaml_vertex.go new file mode 100644 index 000000000..05cdda743 --- /dev/null +++ b/go/graphar/info/yaml_vertex.go @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package info + +import ( + "bytes" + "net/url" + "os" + + internalyaml "github.com/apache/incubator-graphar/go/graphar/internal/yaml" +) + +// LoadVertexInfo loads a single vertex info YAML file from local filesystem. +func LoadVertexInfo(path string) (*VertexInfo, error) { + b, err := os.ReadFile(path) + if err != nil { + return nil, err + } + return LoadVertexInfoFromBytes(b) +} + +// LoadVertexInfoFromBytes loads a single vertex info from bytes. +func LoadVertexInfoFromBytes(b []byte) (*VertexInfo, error) { + var v VertexInfo + if err := internalyaml.DecodeVertex(bytes.NewReader(b), &v); err != nil { + return nil, err + } + + // Prefix is serialized string form of BaseURI. + if v.Prefix != "" { + u, err := url.Parse(v.Prefix) + if err != nil { + return nil, err + } + v.BaseURI = u + } + if err := v.Init(); err != nil { + return nil, err + } + if err := v.Validate(); err != nil { + return nil, err + } + return &v, nil +} diff --git a/go/graphar/internal/yaml/decode.go b/go/graphar/internal/yaml/decode.go new file mode 100644 index 000000000..c98f2fb34 --- /dev/null +++ b/go/graphar/internal/yaml/decode.go @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package yaml + +import ( + "fmt" + "io" + + "gopkg.in/yaml.v3" +) + +// GraphSchema mirrors the on-disk YAML representation of a graph info file. +type GraphSchema struct { + Name string `yaml:"name"` + Prefix string `yaml:"prefix"` + Vertices []string `yaml:"vertices"` + Edges []string `yaml:"edges"` + Version string `yaml:"version"` +} + +// DecodeGraph decodes a graph info YAML from r. +func DecodeGraph(r io.Reader) (*GraphSchema, error) { + var g GraphSchema + if err := yaml.NewDecoder(r).Decode(&g); err != nil { + return nil, fmt.Errorf("decode graph yaml: %w", err) + } + return &g, nil +} + +// DecodeVertex decodes a vertex info YAML from r into v. +func DecodeVertex(r io.Reader, v interface{}) error { + if err := yaml.NewDecoder(r).Decode(v); err != nil { + return fmt.Errorf("decode vertex info: %w", err) + } + return nil +} + +// DecodeEdge decodes an edge info YAML from r into e. +func DecodeEdge(r io.Reader, e interface{}) error { + if err := yaml.NewDecoder(r).Decode(e); err != nil { + return fmt.Errorf("decode edge info: %w", err) + } + return nil +} diff --git a/go/graphar/internal/yaml/yaml_test.go b/go/graphar/internal/yaml/yaml_test.go new file mode 100644 index 000000000..cd0aae53a --- /dev/null +++ b/go/graphar/internal/yaml/yaml_test.go @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package yaml + +import ( + "strings" + "testing" +) + +func TestDecodeGraph(t *testing.T) { + yamlStr := ` +name: test_graph +prefix: /tmp/ +vertices: + - v1.yml +edges: + - e1.yml +version: 0.1.0 +` + g, err := DecodeGraph(strings.NewReader(yamlStr)) + if err != nil { + t.Fatalf("DecodeGraph failed: %v", err) + } + if g.Name != "test_graph" || len(g.Vertices) != 1 { + t.Errorf("unexpected graph data: %+v", g) + } +} + +func TestDecodeVertex(t *testing.T) { + yamlStr := "type: person\nchunk_size: 100" + var v struct { + Type string `yaml:"type"` + ChunkSize int `yaml:"chunk_size"` + } + err := DecodeVertex(strings.NewReader(yamlStr), &v) + if err != nil { + t.Fatalf("DecodeVertex failed: %v", err) + } + if v.Type != "person" || v.ChunkSize != 100 { + t.Errorf("unexpected vertex data: %+v", v) + } +} + +func TestDecodeEdge(t *testing.T) { + yamlStr := "edge_type: knows\ndirected: true" + var e struct { + EdgeType string `yaml:"edge_type"` + Directed bool `yaml:"directed"` + } + err := DecodeEdge(strings.NewReader(yamlStr), &e) + if err != nil { + t.Fatalf("DecodeEdge failed: %v", err) + } + if e.EdgeType != "knows" || !e.Directed { + t.Errorf("unexpected edge data: %+v", e) + } +} diff --git a/licenserc.toml b/licenserc.toml index 33d89581c..460a0fe87 100644 --- a/licenserc.toml +++ b/licenserc.toml @@ -64,3 +64,7 @@ excludes = [ "cli/*.yml", "cli/*.toml", ] + +[mapping.DOUBLESLASH_STYLE] +extensions = ['go'] +filenames = ['go.mod']