From a76f3185b291e513b5800b6c331208290380f91c Mon Sep 17 00:00:00 2001 From: Nadav0077 <18245584+Nadav0077@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:16:29 +0300 Subject: [PATCH 1/2] Bound ReadSymlink blob reads to PATH_MAX Git stores a symlink target as the raw blob body, so the size of the target is controlled by the repository. ReadSymlink was calling os.ReadFile on the hydrated blob with no cap, which means a single readlink(2) against a hostile repo could drive an arbitrarily large allocation (and on blobless clones, a large lazy fetch). Read through an io.LimitReader bounded at 4096 bytes (Linux PATH_MAX) and return ENAMETOOLONG past that. Tests cover the empty, short, at-limit, over-limit, far-over-limit, and missing-cache paths. --- internal/fusefs/fuse_unix.go | 33 ++++++++- internal/fusefs/readsymlink_unix_test.go | 93 ++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 internal/fusefs/readsymlink_unix_test.go diff --git a/internal/fusefs/fuse_unix.go b/internal/fusefs/fuse_unix.go index 3a1108e..93e25af 100644 --- a/internal/fusefs/fuse_unix.go +++ b/internal/fusefs/fuse_unix.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "io" iofs "io/fs" "os" "path/filepath" @@ -469,6 +470,31 @@ func (fs *ArtifactFuse) Rename(ctx context.Context, op *fuseops.RenameOp) error return nil } +// maxSymlinkTargetBytes caps the size of a symlink target we'll hand back to +// the kernel. Linux PATH_MAX is 4096, which is the largest target a real +// symlink can point at anyway. +const maxSymlinkTargetBytes = 4096 + +// readSymlinkTarget reads a cached Git blob and returns its contents as a +// symlink target. Git stores the target as the raw blob body, so nothing +// stops a repo from parking a very large payload behind a mode 120000 entry. +// We read with a bounded LimitReader and reject anything past PATH_MAX. +func readSymlinkTarget(cachePath string) (string, error) { + f, err := os.Open(cachePath) + if err != nil { + return "", err + } + defer f.Close() + data, err := io.ReadAll(io.LimitReader(f, maxSymlinkTargetBytes+1)) + if err != nil { + return "", err + } + if len(data) > maxSymlinkTargetBytes { + return "", syscall.ENAMETOOLONG + } + return string(data), nil +} + func (fs *ArtifactFuse) ReadSymlink(ctx context.Context, op *fuseops.ReadSymlinkOp) error { ref, err := fs.requireInode(op.Inode, syscall.ESTALE) if err != nil { @@ -483,11 +509,14 @@ func (fs *ArtifactFuse) ReadSymlink(ctx context.Context, op *fuseops.ReadSymlink if err != nil { return syscall.EIO } - data, err := os.ReadFile(cachePath) + target, err := readSymlinkTarget(cachePath) if err != nil { + if errors.Is(err, syscall.ENAMETOOLONG) { + return syscall.ENAMETOOLONG + } return syscall.EIO } - op.Target = string(data) + op.Target = target return nil } return syscall.ENOENT diff --git a/internal/fusefs/readsymlink_unix_test.go b/internal/fusefs/readsymlink_unix_test.go new file mode 100644 index 0000000..8527032 --- /dev/null +++ b/internal/fusefs/readsymlink_unix_test.go @@ -0,0 +1,93 @@ +//go:build !windows + +package fusefs + +import ( + "errors" + "os" + "path/filepath" + "strings" + "syscall" + "testing" +) + +func writeBlob(t *testing.T, dir, name string, data []byte) string { + t.Helper() + p := filepath.Join(dir, name) + if err := os.WriteFile(p, data, 0o644); err != nil { + t.Fatalf("write %s: %v", p, err) + } + return p +} + +func TestReadSymlinkTarget_EmptyTarget(t *testing.T) { + dir := t.TempDir() + p := writeBlob(t, dir, "empty", nil) + got, err := readSymlinkTarget(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != "" { + t.Fatalf("target = %q, want empty string", got) + } +} + +func TestReadSymlinkTarget_ShortTarget(t *testing.T) { + dir := t.TempDir() + p := writeBlob(t, dir, "short", []byte("../relative/path")) + got, err := readSymlinkTarget(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != "../relative/path" { + t.Fatalf("target = %q, want %q", got, "../relative/path") + } +} + +func TestReadSymlinkTarget_AtLimit(t *testing.T) { + dir := t.TempDir() + data := []byte(strings.Repeat("a", maxSymlinkTargetBytes)) + p := writeBlob(t, dir, "at-limit", data) + got, err := readSymlinkTarget(p) + if err != nil { + t.Fatalf("unexpected error at %d bytes: %v", maxSymlinkTargetBytes, err) + } + if len(got) != maxSymlinkTargetBytes { + t.Fatalf("target length = %d, want %d", len(got), maxSymlinkTargetBytes) + } +} + +func TestReadSymlinkTarget_OverLimit(t *testing.T) { + dir := t.TempDir() + data := []byte(strings.Repeat("a", maxSymlinkTargetBytes+1)) + p := writeBlob(t, dir, "over-limit", data) + _, err := readSymlinkTarget(p) + if !errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want ENAMETOOLONG", err) + } +} + +func TestReadSymlinkTarget_FarOverLimit(t *testing.T) { + // A blob that's orders of magnitude past PATH_MAX should still be read + // into a bounded slice and rejected, not slurped whole. + dir := t.TempDir() + data := make([]byte, 1<<20) // 1 MiB + for i := range data { + data[i] = 'x' + } + p := writeBlob(t, dir, "huge", data) + _, err := readSymlinkTarget(p) + if !errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want ENAMETOOLONG", err) + } +} + +func TestReadSymlinkTarget_MissingFile(t *testing.T) { + _, err := readSymlinkTarget(filepath.Join(t.TempDir(), "does-not-exist")) + if err == nil { + t.Fatal("expected error for missing cache file, got nil") + } + if errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want non-ENAMETOOLONG for missing file", err) + } +} From aa39d7c1918bafd2ea59e5521223bfbdb129c514 Mon Sep 17 00:00:00 2001 From: Matt Silverlock Date: Tue, 5 May 2026 11:32:04 -0400 Subject: [PATCH 2/2] reject oversized symlinks before hydration --- internal/fusefs/fuse_unix.go | 3 ++ internal/fusefs/readsymlink_unix_test.go | 56 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/internal/fusefs/fuse_unix.go b/internal/fusefs/fuse_unix.go index 93e25af..f830cba 100644 --- a/internal/fusefs/fuse_unix.go +++ b/internal/fusefs/fuse_unix.go @@ -505,6 +505,9 @@ func (fs *ArtifactFuse) ReadSymlink(ctx context.Context, op *fuseops.ReadSymlink return syscall.ENOENT } if n.Base.ObjectOID != "" { + if n.Base.SizeState == "known" && n.Base.SizeBytes > maxSymlinkTargetBytes { + return syscall.ENAMETOOLONG + } cachePath, _, err := fs.engine.Hydrator.EnsureHydrated(ctx, fs.repo, n.Base) if err != nil { return syscall.EIO diff --git a/internal/fusefs/readsymlink_unix_test.go b/internal/fusefs/readsymlink_unix_test.go index 8527032..5f5deb6 100644 --- a/internal/fusefs/readsymlink_unix_test.go +++ b/internal/fusefs/readsymlink_unix_test.go @@ -3,14 +3,34 @@ package fusefs import ( + "context" "errors" "os" "path/filepath" "strings" "syscall" "testing" + + "github.com/cloudflare/artifact-fs/internal/model" + "github.com/jacobsa/fuse/fuseops" ) +type fakeSymlinkHydrator struct { + calls int + cachePath string + size int64 + err error +} + +func (f *fakeSymlinkHydrator) Enqueue(model.HydrationTask) {} + +func (f *fakeSymlinkHydrator) EnsureHydrated(_ context.Context, _ model.RepoConfig, _ model.BaseNode) (string, int64, error) { + f.calls++ + return f.cachePath, f.size, f.err +} + +func (f *fakeSymlinkHydrator) QueueDepth(model.RepoID) int { return 0 } + func writeBlob(t *testing.T, dir, name string, data []byte) string { t.Helper() p := filepath.Join(dir, name) @@ -91,3 +111,39 @@ func TestReadSymlinkTarget_MissingFile(t *testing.T) { t.Fatalf("err = %v, want non-ENAMETOOLONG for missing file", err) } } + +func TestReadSymlinkRejectsKnownOversizedBlobBeforeHydration(t *testing.T) { + hydrator := &fakeSymlinkHydrator{} + repoID := model.RepoID("repo") + resolver := newResolver( + &fakeSnapshot{nodes: map[string]model.BaseNode{ + "link": { + RepoID: repoID, + Path: "link", + Type: "symlink", + Mode: 0o120000, + ObjectOID: "blob", + SizeState: "known", + SizeBytes: int64(maxSymlinkTargetBytes + 1), + }, + }}, + &fakeOverlay{entries: map[string]model.OverlayEntry{}}, + ) + fs := NewArtifactFuse(model.RepoConfig{ID: repoID}, resolver, &Engine{Hydrator: hydrator}) + + fs.mu.Lock() + ref := fs.allocInode("link", "symlink", 0o120000) + fs.mu.Unlock() + + op := &fuseops.ReadSymlinkOp{Inode: ref.ID} + err := fs.ReadSymlink(context.Background(), op) + if !errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want ENAMETOOLONG", err) + } + if hydrator.calls != 0 { + t.Fatalf("EnsureHydrated calls = %d, want 0", hydrator.calls) + } + if op.Target != "" { + t.Fatalf("target = %q, want empty", op.Target) + } +}