Skip to content

Commit 9253e50

Browse files
committed
feat: only emit changed files with git walker
If the modified time has not changed when compared with the git index we do not emit the file for processing. This allows users to introduce treefmt to a repository without suffering an initial large formatting commit. Instead, files can be formatted incrementally as they are changed. Closes #311 Signed-off-by: Brian McGee <[email protected]>
1 parent 0953dd5 commit 9253e50

File tree

9 files changed

+87
-54
lines changed

9 files changed

+87
-54
lines changed

cache/cache.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
"git.numtide.com/numtide/treefmt/stats"
1313

1414
"git.numtide.com/numtide/treefmt/format"
15-
"git.numtide.com/numtide/treefmt/walk"
15+
"git.numtide.com/numtide/treefmt/walker"
1616

1717
"github.com/charmbracelet/log"
1818

@@ -187,7 +187,7 @@ func putEntry(bucket *bolt.Bucket, path string, entry *Entry) error {
187187

188188
// ChangeSet is used to walk a filesystem, starting at root, and outputting any new or changed paths using pathsCh.
189189
// It determines if a path is new or has changed by comparing against cache entries.
190-
func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.File) error {
190+
func ChangeSet(ctx context.Context, wk walker.Walker, filesCh chan<- *walker.File) error {
191191
start := time.Now()
192192

193193
defer func() {
@@ -205,7 +205,7 @@ func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.Fil
205205
}
206206
}()
207207

208-
return walker.Walk(ctx, func(file *walk.File, err error) error {
208+
return wk.Walk(ctx, func(file *walker.File, err error) error {
209209
select {
210210
case <-ctx.Done():
211211
return ctx.Err()
@@ -264,7 +264,7 @@ func ChangeSet(ctx context.Context, walker walk.Walker, filesCh chan<- *walk.Fil
264264
}
265265

266266
// Update is used to record updated cache information for the specified list of paths.
267-
func Update(files []*walk.File) error {
267+
func Update(files []*walker.File) error {
268268
start := time.Now()
269269
defer func() {
270270
logger.Debugf("finished processing %v paths in %v", len(files), time.Since(start))

cli/cli.go

+9-8
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"github.com/gobwas/glob"
77

88
"git.numtide.com/numtide/treefmt/format"
9-
"git.numtide.com/numtide/treefmt/walk"
9+
"git.numtide.com/numtide/treefmt/walker"
1010
"github.com/alecthomas/kong"
1111
"github.com/charmbracelet/log"
1212
)
@@ -25,10 +25,11 @@ type Format struct {
2525
Formatters []string `short:"f" help:"Specify formatters to apply. Defaults to all formatters."`
2626
TreeRoot string `type:"existingdir" xor:"tree-root" help:"The root directory from which treefmt will start walking the filesystem (defaults to the directory containing the config file)."`
2727
TreeRootFile string `type:"string" xor:"tree-root" help:"File to search for to find the project root (if --tree-root is not passed)."`
28-
Walk walk.Type `enum:"auto,git,filesystem" default:"auto" help:"The method used to traverse the files within --tree-root. Currently supports 'auto', 'git' or 'filesystem'."`
29-
Verbosity int `name:"verbose" short:"v" type:"counter" default:"0" env:"LOG_LEVEL" help:"Set the verbosity of logs e.g. -vv."`
30-
Version bool `name:"version" short:"V" help:"Print version."`
31-
Init bool `name:"init" short:"i" help:"Create a new treefmt.toml."`
28+
Walk walker.Type `enum:"auto,git,filesystem" default:"auto" help:"The method used to traverse the files within --tree-root. Currently supports 'auto', 'git' or 'filesystem'."`
29+
30+
Verbosity int `name:"verbose" short:"v" type:"counter" default:"0" env:"LOG_LEVEL" help:"Set the verbosity of logs e.g. -vv."`
31+
Version bool `name:"version" short:"V" help:"Print version."`
32+
Init bool `name:"init" short:"i" help:"Create a new treefmt.toml."`
3233

3334
OnUnmatched log.Level `name:"on-unmatched" short:"u" default:"warn" help:"Log paths that did not match any formatters at the specified log level, with fatal exiting the process with an error. Possible values are <debug|info|warn|error|fatal>."`
3435

@@ -40,9 +41,9 @@ type Format struct {
4041
formatters map[string]*format.Formatter
4142
globalExcludes []glob.Glob
4243

43-
filesCh chan *walk.File
44-
formattedCh chan *walk.File
45-
processedCh chan *walk.File
44+
fileCh chan *walker.File
45+
formattedCh chan *walker.File
46+
processedCh chan *walker.File
4647
}
4748

4849
func (f *Format) configureLogging() {

cli/format.go

+18-18
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import (
1818

1919
"git.numtide.com/numtide/treefmt/cache"
2020
"git.numtide.com/numtide/treefmt/config"
21-
"git.numtide.com/numtide/treefmt/walk"
21+
"git.numtide.com/numtide/treefmt/walker"
2222

2323
"github.com/charmbracelet/log"
2424
"golang.org/x/sync/errgroup"
@@ -147,13 +147,13 @@ func (f *Format) Run() (err error) {
147147

148148
// create a channel for files needing to be processed
149149
// we use a multiple of batch size here as a rudimentary concurrency optimization based on the host machine
150-
f.filesCh = make(chan *walk.File, BatchSize*runtime.NumCPU())
150+
f.fileCh = make(chan *walker.File, BatchSize*runtime.NumCPU())
151151

152152
// create a channel for files that have been formatted
153-
f.formattedCh = make(chan *walk.File, cap(f.filesCh))
153+
f.formattedCh = make(chan *walker.File, cap(f.fileCh))
154154

155155
// create a channel for files that have been processed
156-
f.processedCh = make(chan *walk.File, cap(f.filesCh))
156+
f.processedCh = make(chan *walker.File, cap(f.fileCh))
157157

158158
// start concurrent processing tasks in reverse order
159159
eg.Go(f.updateCache(ctx))
@@ -168,14 +168,14 @@ func (f *Format) Run() (err error) {
168168
func (f *Format) walkFilesystem(ctx context.Context) func() error {
169169
return func() error {
170170
eg, ctx := errgroup.WithContext(ctx)
171-
pathsCh := make(chan string, BatchSize)
171+
pathCh := make(chan string, BatchSize)
172172

173173
// By default, we use the cli arg, but if the stdin flag has been set we force a filesystem walk
174174
// since we will only be processing one file from a temp directory
175175
walkerType := f.Walk
176176

177177
if f.Stdin {
178-
walkerType = walk.Filesystem
178+
walkerType = walker.Filesystem
179179

180180
// check we have only received one path arg which we use for the file extension / matching to formatters
181181
if len(f.Paths) != 1 {
@@ -197,15 +197,15 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error {
197197
}
198198

199199
walkPaths := func() error {
200-
defer close(pathsCh)
200+
defer close(pathCh)
201201

202202
var idx int
203203
for idx < len(f.Paths) {
204204
select {
205205
case <-ctx.Done():
206206
return ctx.Err()
207207
default:
208-
pathsCh <- f.Paths[idx]
208+
pathCh <- f.Paths[idx]
209209
idx += 1
210210
}
211211
}
@@ -217,37 +217,37 @@ func (f *Format) walkFilesystem(ctx context.Context) func() error {
217217
eg.Go(walkPaths)
218218
} else {
219219
// no explicit paths to process, so we only need to process root
220-
pathsCh <- f.TreeRoot
221-
close(pathsCh)
220+
pathCh <- f.TreeRoot
221+
close(pathCh)
222222
}
223223

224224
// create a filesystem walker
225-
walker, err := walk.New(walkerType, f.TreeRoot, pathsCh)
225+
wk, err := walker.New(walkerType, f.TreeRoot, f.NoCache, pathCh)
226226
if err != nil {
227227
return fmt.Errorf("failed to create walker: %w", err)
228228
}
229229

230-
// close the files channel when we're done walking the file system
231-
defer close(f.filesCh)
230+
// close the file channel when we're done walking the file system
231+
defer close(f.fileCh)
232232

233233
// if no cache has been configured, or we are processing from stdin, we invoke the walker directly
234234
if f.NoCache || f.Stdin {
235-
return walker.Walk(ctx, func(file *walk.File, err error) error {
235+
return wk.Walk(ctx, func(file *walker.File, err error) error {
236236
select {
237237
case <-ctx.Done():
238238
return ctx.Err()
239239
default:
240240
stats.Add(stats.Traversed, 1)
241241
stats.Add(stats.Emitted, 1)
242-
f.filesCh <- file
242+
f.fileCh <- file
243243
return nil
244244
}
245245
})
246246
}
247247

248248
// otherwise we pass the walker to the cache and have it generate files for processing based on whether or not
249249
// they have been added/changed since the last invocation
250-
if err = cache.ChangeSet(ctx, walker, f.filesCh); err != nil {
250+
if err = cache.ChangeSet(ctx, wk, f.fileCh); err != nil {
251251
return fmt.Errorf("failed to generate change set: %w", err)
252252
}
253253
return nil
@@ -319,7 +319,7 @@ func (f *Format) applyFormatters(ctx context.Context) func() error {
319319
}()
320320

321321
// iterate the files channel
322-
for file := range f.filesCh {
322+
for file := range f.fileCh {
323323

324324
// first check if this file has been globally excluded
325325
if format.PathMatches(file.RelPath, f.globalExcludes) {
@@ -419,7 +419,7 @@ func (f *Format) detectFormatted(ctx context.Context) func() error {
419419
func (f *Format) updateCache(ctx context.Context) func() error {
420420
return func() error {
421421
// used to batch updates for more efficient txs
422-
batch := make([]*walk.File, 0, BatchSize)
422+
batch := make([]*walker.File, 0, BatchSize)
423423

424424
// apply a batch
425425
processBatch := func() error {

format/formatter.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
"os/exec"
99
"time"
1010

11-
"git.numtide.com/numtide/treefmt/walk"
11+
"git.numtide.com/numtide/treefmt/walker"
1212

1313
"git.numtide.com/numtide/treefmt/config"
1414

@@ -89,7 +89,7 @@ func (f *Formatter) Apply(ctx context.Context, tasks []*Task) error {
8989

9090
// Wants is used to test if a Formatter wants a path based on it's configured Includes and Excludes patterns.
9191
// Returns true if the Formatter should be applied to path, false otherwise.
92-
func (f *Formatter) Wants(file *walk.File) bool {
92+
func (f *Formatter) Wants(file *walker.File) bool {
9393
match := !PathMatches(file.RelPath, f.excludes) && PathMatches(file.RelPath, f.includes)
9494
if match {
9595
f.log.Debugf("match: %v", file)

format/task.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ import (
44
"cmp"
55
"slices"
66

7-
"git.numtide.com/numtide/treefmt/walk"
7+
"git.numtide.com/numtide/treefmt/walker"
88
)
99

1010
type Task struct {
11-
File *walk.File
11+
File *walker.File
1212
Formatters []*Formatter
1313
BatchKey string
1414
}
1515

16-
func NewTask(file *walk.File, formatters []*Formatter) Task {
16+
func NewTask(file *walker.File, formatters []*Formatter) Task {
1717
// sort by priority in ascending order
1818
slices.SortFunc(formatters, func(a, b *Formatter) int {
1919
priorityA := a.Priority()

walk/filesystem.go walker/filesystem.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package walk
1+
package walker
22

33
import (
44
"context"

walk/filesystem_test.go walker/filesystem_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package walk
1+
package walker
22

33
import (
44
"context"

walk/git.go walker/git.go

+43-11
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
1-
package walk
1+
package walker
22

33
import (
44
"context"
55
"fmt"
66
"io/fs"
77
"os"
88
"path/filepath"
9+
"time"
10+
11+
"github.com/go-git/go-git/v5/plumbing/format/index"
912

1013
"github.com/charmbracelet/log"
1114

1215
"github.com/go-git/go-git/v5"
1316
)
1417

1518
type gitWalker struct {
16-
root string
17-
paths chan string
18-
repo *git.Repository
19+
root string
20+
paths chan string
21+
repo *git.Repository
22+
23+
noCache bool
1924
relPathOffset int
2025
}
2126

@@ -39,7 +44,20 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
3944
}
4045

4146
// cache in-memory whether a path is present in the git index
42-
var cache map[string]bool
47+
var cache map[string]*index.Entry
48+
49+
// by default, we only emit files if they have changes when compared with the git index
50+
emitFile := func(entry *index.Entry, info os.FileInfo) bool {
51+
// mod time comparison is done with EPOCH (second) precision as per the POSIX spec
52+
return entry.ModifiedAt.Truncate(time.Second) != info.ModTime().Truncate(time.Second)
53+
}
54+
55+
if g.noCache {
56+
// emit all files in the index
57+
emitFile = func(entry *index.Entry, info os.FileInfo) bool {
58+
return true
59+
}
60+
}
4361

4462
for path := range g.paths {
4563

@@ -63,6 +81,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
6381
return fmt.Errorf("failed to stat %s: %w", path, err)
6482
}
6583

84+
// skip processing if the file hasn't changed
85+
if !emitFile(entry, info) {
86+
continue
87+
}
88+
6689
// determine a relative path
6790
relPath, err := g.relPath(path)
6891
if err != nil {
@@ -83,11 +106,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
83106
continue
84107
}
85108

86-
// otherwise we ensure the git index entries are cached and then check if they are in the git index
109+
// otherwise we ensure the git index entries are cached and then check if the path is in the git index
87110
if cache == nil {
88-
cache = make(map[string]bool)
111+
cache = make(map[string]*index.Entry)
89112
for _, entry := range idx.Entries {
90-
cache[entry.Name] = true
113+
cache[entry.Name] = entry
91114
}
92115
}
93116

@@ -103,7 +126,8 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
103126
}
104127

105128
return filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error {
106-
if info.IsDir() {
129+
// ignore directories and symlinks
130+
if info.IsDir() || info.Mode()&os.ModeSymlink == os.ModeSymlink {
107131
return nil
108132
}
109133

@@ -112,9 +136,12 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
112136
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
113137
}
114138

115-
if _, ok := cache[relPath]; !ok {
139+
if entry, ok := cache[relPath]; !ok {
116140
log.Debugf("path %v not found in git index, skipping", path)
117141
return nil
142+
} else if !emitFile(entry, info) {
143+
log.Debugf("path %v has not changed, skipping", path)
144+
return nil
118145
}
119146

120147
file := File{
@@ -130,7 +157,11 @@ func (g gitWalker) Walk(ctx context.Context, fn WalkFunc) error {
130157
return nil
131158
}
132159

133-
func NewGit(root string, paths chan string) (Walker, error) {
160+
func NewGit(
161+
root string,
162+
noCache bool,
163+
paths chan string,
164+
) (Walker, error) {
134165
repo, err := git.PlainOpen(root)
135166
if err != nil {
136167
return nil, fmt.Errorf("failed to open git repo: %w", err)
@@ -139,6 +170,7 @@ func NewGit(root string, paths chan string) (Walker, error) {
139170
root: root,
140171
paths: paths,
141172
repo: repo,
173+
noCache: noCache,
142174
relPathOffset: len(root) + 1,
143175
}, nil
144176
}

0 commit comments

Comments
 (0)