Skip to content

Commit fa1d525

Browse files
ejholmesclaude
andcommitted
Add pkg/digest package for directory hashing
Implements a generic digest.Digest function that computes a hash of all content under a specified directory path. Key features: - Uses filepath.WalkDir for efficient directory traversal - Resolves symlinks (both files and directories) via os.Stat - Produces deterministic hashes by sorting paths alphabetically - Accepts hash.Hash parameter for algorithm flexibility (SHA256, MD5, etc.) - Includes comprehensive tests for: - Basic directories with multiple files - Nested subdirectories - Symlinks to files - Symlinks to directories - File name impact on hash The function signature is digest.Digest(path string, h hash.Hash) error, allowing callers to provide their preferred hash algorithm. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 3fdc849 commit fa1d525

File tree

2 files changed

+213
-0
lines changed

2 files changed

+213
-0
lines changed

pkg/digest/digest.go

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package digest
2+
3+
import (
4+
"fmt"
5+
"hash"
6+
"io"
7+
"io/fs"
8+
"os"
9+
"path/filepath"
10+
"sort"
11+
)
12+
13+
// Digest computes a hash of all content under the specified directory path.
14+
// It recursively walks the directory tree, including file contents and paths in the hash.
15+
// Symlinks are resolved to their real paths before being included in the digest.
16+
//
17+
// The digest is deterministic - the same directory structure and content will always
18+
// produce the same hash, regardless of filesystem timestamps or other metadata.
19+
//
20+
// The hash.Hash parameter allows the caller to choose the hash algorithm (e.g., SHA256, MD5).
21+
// Returns an error if the directory cannot be read.
22+
func Digest(path string, h hash.Hash) error {
23+
24+
// Collect all file paths first so we can sort them for deterministic ordering
25+
var paths []string
26+
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, err error) error {
27+
if err != nil {
28+
return err
29+
}
30+
31+
// Get relative path for consistent hashing
32+
relPath, err := filepath.Rel(path, p)
33+
if err != nil {
34+
return fmt.Errorf("failed to get relative path for %s: %w", p, err)
35+
}
36+
37+
// Skip the root directory itself
38+
if relPath == "." {
39+
return nil
40+
}
41+
42+
paths = append(paths, p)
43+
return nil
44+
})
45+
46+
if err != nil {
47+
return fmt.Errorf("failed to walk directory %s: %w", path, err)
48+
}
49+
50+
// Sort paths for deterministic ordering
51+
sort.Strings(paths)
52+
53+
// Process each path in sorted order
54+
for _, p := range paths {
55+
relPath, err := filepath.Rel(path, p)
56+
if err != nil {
57+
return fmt.Errorf("failed to get relative path for %s: %w", p, err)
58+
}
59+
60+
// Get file info, following symlinks
61+
info, err := os.Stat(p)
62+
if err != nil {
63+
return fmt.Errorf("failed to stat %s: %w", p, err)
64+
}
65+
66+
// Write the relative path to the hash for determinism
67+
if _, err := h.Write([]byte(relPath)); err != nil {
68+
return fmt.Errorf("failed to write path to hash: %w", err)
69+
}
70+
71+
// If it's a directory, just include the path
72+
if info.IsDir() {
73+
continue
74+
}
75+
76+
// For files, include the content
77+
if err := hashFile(h, p); err != nil {
78+
return fmt.Errorf("failed to hash file %s: %w", p, err)
79+
}
80+
}
81+
82+
return nil
83+
}
84+
85+
// hashFile reads a file and writes its content to the hash
86+
func hashFile(h io.Writer, path string) error {
87+
f, err := os.Open(path)
88+
if err != nil {
89+
return fmt.Errorf("failed to open file: %w", err)
90+
}
91+
defer f.Close()
92+
93+
if _, err := io.Copy(h, f); err != nil {
94+
return fmt.Errorf("failed to read file: %w", err)
95+
}
96+
97+
return nil
98+
}

pkg/digest/digest_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package digest
2+
3+
import (
4+
"crypto/sha256"
5+
"encoding/hex"
6+
"os"
7+
"path/filepath"
8+
"testing"
9+
)
10+
11+
func TestDigest(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
files []file
15+
want string
16+
}{
17+
{
18+
name: "basic directory",
19+
files: []file{
20+
{name: "file1.txt", content: "content1"},
21+
{name: "file2.txt", content: "content2"},
22+
},
23+
want: "aa048f5c591bf3ebbc02ac20f0c84f2669f22bd043e88a44a57f3f27cda52ae7",
24+
},
25+
{
26+
name: "file names affect hash",
27+
files: []file{
28+
{name: "different1.txt", content: "content1"},
29+
{name: "different2.txt", content: "content2"},
30+
},
31+
want: "da5ccfddcdecbe0ff72334d16234759794b1084e6d26dabde16ec826f4b58879",
32+
},
33+
{
34+
name: "with subdirectory",
35+
files: []file{
36+
{name: "file1.txt", content: "content1"},
37+
{name: "subdir/file2.txt", content: "content2"},
38+
{name: "subdir/file3.txt", content: "content3"},
39+
},
40+
want: "41217b30e5a1cd74aa659c4fbcb01fcf0a23f95c65fe0c3fcd7bdd05a3a2fa30",
41+
},
42+
{
43+
name: "with symlink to file",
44+
files: []file{
45+
{name: "target.txt", content: "content"},
46+
{name: "link.txt", symlink: "target.txt"},
47+
},
48+
want: "c020ab2266dc1f79afab18ece47828c41bcbab2551955a62039f3fba5fa6f1ff",
49+
},
50+
{
51+
name: "with symlink to directory",
52+
files: []file{
53+
{name: "targetdir/file1.txt", content: "content1"},
54+
{name: "targetdir/file2.txt", content: "content2"},
55+
{name: "linkdir", symlink: "targetdir"},
56+
},
57+
want: "f6cdd79e7c0be70c79a5da677b34a8d3654881f35c6e15ec310867c12abc2b33",
58+
},
59+
}
60+
61+
for _, tt := range tests {
62+
t.Run(tt.name, func(t *testing.T) {
63+
dir := t.TempDir()
64+
setupFiles(t, dir, tt.files)
65+
validateDigest(t, dir, tt.want)
66+
})
67+
}
68+
}
69+
70+
type file struct {
71+
name string // path of the file
72+
content string // content for regular files
73+
symlink string // target for symlinks (if non-empty, this is a symlink)
74+
}
75+
76+
// setupFiles creates files and symlinks in the specified directory.
77+
// Paths can include subdirectories (e.g., "subdir/file.txt") and parent directories
78+
// will be created automatically.
79+
func setupFiles(t *testing.T, dir string, files []file) {
80+
t.Helper()
81+
for _, f := range files {
82+
fullPath := filepath.Join(dir, f.name)
83+
// Create parent directories if needed
84+
if err := os.MkdirAll(filepath.Dir(fullPath), 0755); err != nil {
85+
t.Fatal(err)
86+
}
87+
88+
if f.symlink != "" {
89+
// Create a symlink
90+
target := filepath.Join(dir, f.symlink)
91+
if err := os.Symlink(target, fullPath); err != nil {
92+
t.Fatal(err)
93+
}
94+
} else {
95+
// Create a regular file
96+
if err := os.WriteFile(fullPath, []byte(f.content), 0644); err != nil {
97+
t.Fatal(err)
98+
}
99+
}
100+
}
101+
}
102+
103+
// validateDigest computes the digest of a directory and compares it to the expected hash.
104+
func validateDigest(t *testing.T, dir string, want string) {
105+
t.Helper()
106+
h := sha256.New()
107+
if err := Digest(dir, h); err != nil {
108+
t.Fatalf("Digest() error = %v", err)
109+
}
110+
111+
got := hex.EncodeToString(h.Sum(nil))
112+
if got != want {
113+
t.Errorf("Digest() = %s, want %s", got, want)
114+
}
115+
}

0 commit comments

Comments
 (0)