diff --git a/internal/fusefs/fuse_unix.go b/internal/fusefs/fuse_unix.go index 3a1108e..f830cba 100644 --- a/internal/fusefs/fuse_unix.go +++ b/internal/fusefs/fuse_unix.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "io" iofs "io/fs" "os" "path/filepath" @@ -469,6 +470,31 @@ func (fs *ArtifactFuse) Rename(ctx context.Context, op *fuseops.RenameOp) error return nil } +// maxSymlinkTargetBytes caps the size of a symlink target we'll hand back to +// the kernel. Linux PATH_MAX is 4096, which is the largest target a real +// symlink can point at anyway. +const maxSymlinkTargetBytes = 4096 + +// readSymlinkTarget reads a cached Git blob and returns its contents as a +// symlink target. Git stores the target as the raw blob body, so nothing +// stops a repo from parking a very large payload behind a mode 120000 entry. +// We read with a bounded LimitReader and reject anything past PATH_MAX. +func readSymlinkTarget(cachePath string) (string, error) { + f, err := os.Open(cachePath) + if err != nil { + return "", err + } + defer f.Close() + data, err := io.ReadAll(io.LimitReader(f, maxSymlinkTargetBytes+1)) + if err != nil { + return "", err + } + if len(data) > maxSymlinkTargetBytes { + return "", syscall.ENAMETOOLONG + } + return string(data), nil +} + func (fs *ArtifactFuse) ReadSymlink(ctx context.Context, op *fuseops.ReadSymlinkOp) error { ref, err := fs.requireInode(op.Inode, syscall.ESTALE) if err != nil { @@ -479,15 +505,21 @@ func (fs *ArtifactFuse) ReadSymlink(ctx context.Context, op *fuseops.ReadSymlink return syscall.ENOENT } if n.Base.ObjectOID != "" { + if n.Base.SizeState == "known" && n.Base.SizeBytes > maxSymlinkTargetBytes { + return syscall.ENAMETOOLONG + } cachePath, _, err := fs.engine.Hydrator.EnsureHydrated(ctx, fs.repo, n.Base) if err != nil { return syscall.EIO } - data, err := os.ReadFile(cachePath) + target, err := readSymlinkTarget(cachePath) if err != nil { + if errors.Is(err, syscall.ENAMETOOLONG) { + return syscall.ENAMETOOLONG + } return syscall.EIO } - op.Target = string(data) + op.Target = target return nil } return syscall.ENOENT diff --git a/internal/fusefs/readsymlink_unix_test.go b/internal/fusefs/readsymlink_unix_test.go new file mode 100644 index 0000000..5f5deb6 --- /dev/null +++ b/internal/fusefs/readsymlink_unix_test.go @@ -0,0 +1,149 @@ +//go:build !windows + +package fusefs + +import ( + "context" + "errors" + "os" + "path/filepath" + "strings" + "syscall" + "testing" + + "github.com/cloudflare/artifact-fs/internal/model" + "github.com/jacobsa/fuse/fuseops" +) + +type fakeSymlinkHydrator struct { + calls int + cachePath string + size int64 + err error +} + +func (f *fakeSymlinkHydrator) Enqueue(model.HydrationTask) {} + +func (f *fakeSymlinkHydrator) EnsureHydrated(_ context.Context, _ model.RepoConfig, _ model.BaseNode) (string, int64, error) { + f.calls++ + return f.cachePath, f.size, f.err +} + +func (f *fakeSymlinkHydrator) QueueDepth(model.RepoID) int { return 0 } + +func writeBlob(t *testing.T, dir, name string, data []byte) string { + t.Helper() + p := filepath.Join(dir, name) + if err := os.WriteFile(p, data, 0o644); err != nil { + t.Fatalf("write %s: %v", p, err) + } + return p +} + +func TestReadSymlinkTarget_EmptyTarget(t *testing.T) { + dir := t.TempDir() + p := writeBlob(t, dir, "empty", nil) + got, err := readSymlinkTarget(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != "" { + t.Fatalf("target = %q, want empty string", got) + } +} + +func TestReadSymlinkTarget_ShortTarget(t *testing.T) { + dir := t.TempDir() + p := writeBlob(t, dir, "short", []byte("../relative/path")) + got, err := readSymlinkTarget(p) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != "../relative/path" { + t.Fatalf("target = %q, want %q", got, "../relative/path") + } +} + +func TestReadSymlinkTarget_AtLimit(t *testing.T) { + dir := t.TempDir() + data := []byte(strings.Repeat("a", maxSymlinkTargetBytes)) + p := writeBlob(t, dir, "at-limit", data) + got, err := readSymlinkTarget(p) + if err != nil { + t.Fatalf("unexpected error at %d bytes: %v", maxSymlinkTargetBytes, err) + } + if len(got) != maxSymlinkTargetBytes { + t.Fatalf("target length = %d, want %d", len(got), maxSymlinkTargetBytes) + } +} + +func TestReadSymlinkTarget_OverLimit(t *testing.T) { + dir := t.TempDir() + data := []byte(strings.Repeat("a", maxSymlinkTargetBytes+1)) + p := writeBlob(t, dir, "over-limit", data) + _, err := readSymlinkTarget(p) + if !errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want ENAMETOOLONG", err) + } +} + +func TestReadSymlinkTarget_FarOverLimit(t *testing.T) { + // A blob that's orders of magnitude past PATH_MAX should still be read + // into a bounded slice and rejected, not slurped whole. + dir := t.TempDir() + data := make([]byte, 1<<20) // 1 MiB + for i := range data { + data[i] = 'x' + } + p := writeBlob(t, dir, "huge", data) + _, err := readSymlinkTarget(p) + if !errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want ENAMETOOLONG", err) + } +} + +func TestReadSymlinkTarget_MissingFile(t *testing.T) { + _, err := readSymlinkTarget(filepath.Join(t.TempDir(), "does-not-exist")) + if err == nil { + t.Fatal("expected error for missing cache file, got nil") + } + if errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want non-ENAMETOOLONG for missing file", err) + } +} + +func TestReadSymlinkRejectsKnownOversizedBlobBeforeHydration(t *testing.T) { + hydrator := &fakeSymlinkHydrator{} + repoID := model.RepoID("repo") + resolver := newResolver( + &fakeSnapshot{nodes: map[string]model.BaseNode{ + "link": { + RepoID: repoID, + Path: "link", + Type: "symlink", + Mode: 0o120000, + ObjectOID: "blob", + SizeState: "known", + SizeBytes: int64(maxSymlinkTargetBytes + 1), + }, + }}, + &fakeOverlay{entries: map[string]model.OverlayEntry{}}, + ) + fs := NewArtifactFuse(model.RepoConfig{ID: repoID}, resolver, &Engine{Hydrator: hydrator}) + + fs.mu.Lock() + ref := fs.allocInode("link", "symlink", 0o120000) + fs.mu.Unlock() + + op := &fuseops.ReadSymlinkOp{Inode: ref.ID} + err := fs.ReadSymlink(context.Background(), op) + if !errors.Is(err, syscall.ENAMETOOLONG) { + t.Fatalf("err = %v, want ENAMETOOLONG", err) + } + if hydrator.calls != 0 { + t.Fatalf("EnsureHydrated calls = %d, want 0", hydrator.calls) + } + if op.Target != "" { + t.Fatalf("target = %q, want empty", op.Target) + } +}