Skip to content

Commit 2789b36

Browse files
committed
feat: simplify git walking
Signed-off-by: Brian McGee <[email protected]>
1 parent 721c3cf commit 2789b36

File tree

5 files changed

+82
-249
lines changed

5 files changed

+82
-249
lines changed

walk/filetree.go

-62
This file was deleted.

walk/filetree_test.go

-31
This file was deleted.

walk/git.go

+79-153
Original file line numberDiff line numberDiff line change
@@ -1,200 +1,126 @@
11
package walk
22

33
import (
4+
"bufio"
45
"context"
56
"fmt"
7+
"github.com/charmbracelet/log"
8+
"github.com/numtide/treefmt/stats"
9+
"golang.org/x/sync/errgroup"
610
"io"
7-
"io/fs"
811
"os"
12+
"os/exec"
913
"path/filepath"
10-
"runtime"
1114
"strings"
12-
13-
"github.com/charmbracelet/log"
14-
"github.com/go-git/go-git/v5"
15-
"github.com/go-git/go-git/v5/plumbing/filemode"
16-
"github.com/numtide/treefmt/stats"
17-
"golang.org/x/sync/errgroup"
1815
)
1916

2017
type GitReader struct {
21-
root string
22-
path string
23-
stats *stats.Stats
24-
batchSize int
18+
root string
19+
path string
20+
args []string
2521

26-
log *log.Logger
27-
repo *git.Repository
22+
log *log.Logger
23+
stats *stats.Stats
2824

29-
filesCh chan *File
30-
31-
eg *errgroup.Group
25+
eg *errgroup.Group
26+
scanner *bufio.Scanner
3227
}
3328

34-
func (g *GitReader) process() error {
29+
func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) {
30+
// ensure we record how many files we traversed
3531
defer func() {
36-
close(g.filesCh)
32+
g.stats.Add(stats.Traversed, int32(n))
3733
}()
3834

39-
gitIndex, err := g.repo.Storer.Index()
40-
if err != nil {
41-
return fmt.Errorf("failed to open git index: %w", err)
42-
}
43-
44-
// if we need to walk a path that is not the root of the repository, we will read the directory structure of the
45-
// git index into memory for faster lookups
46-
var idxCache *filetree
47-
48-
path := filepath.Clean(filepath.Join(g.root, g.path))
49-
if !strings.HasPrefix(path, g.root) {
50-
return fmt.Errorf("path '%s' is outside of the root '%s'", path, g.root)
51-
}
52-
53-
switch path {
54-
55-
case g.root:
56-
57-
// we can just iterate the index entries
58-
for _, entry := range gitIndex.Entries {
59-
60-
// we only want regular files, not directories or symlinks
61-
if entry.Mode == filemode.Dir || entry.Mode == filemode.Symlink {
62-
continue
63-
}
64-
65-
// stat the file
66-
path := filepath.Join(g.root, entry.Name)
67-
68-
info, err := os.Lstat(path)
69-
if os.IsNotExist(err) {
70-
// the underlying file might have been removed without the change being staged yet
71-
g.log.Warnf("Path %s is in the index but appears to have been removed from the filesystem", path)
72-
continue
73-
} else if err != nil {
74-
return fmt.Errorf("failed to stat %s: %w", path, err)
75-
}
76-
77-
// determine a relative path
78-
relPath, err := filepath.Rel(g.root, path)
79-
if err != nil {
80-
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
81-
}
82-
83-
file := File{
84-
Path: path,
85-
RelPath: relPath,
86-
Info: info,
87-
}
88-
89-
g.stats.Add(stats.Traversed, 1)
90-
g.filesCh <- &file
91-
}
92-
93-
default:
94-
95-
// read the git index into memory if it hasn't already
96-
if idxCache == nil {
97-
idxCache = &filetree{name: ""}
98-
idxCache.readIndex(gitIndex)
99-
}
100-
101-
// git index entries are relative to the repository root, so we need to determine a relative path for the
102-
// one we are currently processing before checking if it exists within the git index
103-
relPath, err := filepath.Rel(g.root, path)
104-
if err != nil {
105-
return fmt.Errorf("failed to find root relative path for %v: %w", path, err)
106-
}
107-
108-
if !idxCache.hasPath(relPath) {
109-
log.Debugf("path %s not found in git index, skipping", relPath)
110-
return nil
111-
}
112-
113-
err = filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error {
114-
// skip directories
115-
if info.IsDir() {
116-
return nil
117-
}
118-
119-
// determine a path relative to g.root before checking presence in the git index
120-
relPath, err := filepath.Rel(g.root, path)
121-
if err != nil {
122-
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err)
123-
}
124-
125-
if !idxCache.hasPath(relPath) {
126-
log.Debugf("path %v not found in git index, skipping", relPath)
127-
return nil
128-
}
35+
if g.scanner == nil {
36+
// create a pipe to capture the command output
37+
r, w := io.Pipe()
12938

130-
file := File{
131-
Path: path,
132-
RelPath: relPath,
133-
Info: info,
134-
}
39+
// create a command which will execute from the specified sub path within root
40+
cmd := exec.Command("git", g.args...)
41+
cmd.Dir = filepath.Join(g.root, g.path)
42+
cmd.Stdout = w
13543

136-
g.stats.Add(stats.Traversed, 1)
137-
g.filesCh <- &file
138-
return nil
44+
// execute the command in the background
45+
g.eg.Go(func() error {
46+
return w.CloseWithError(cmd.Run())
13947
})
140-
if err != nil {
141-
return fmt.Errorf("failed to walk %s: %w", path, err)
142-
}
143-
}
14448

145-
return nil
146-
}
147-
148-
func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) {
149-
idx := 0
49+
// create a new scanner for reading the output
50+
g.scanner = bufio.NewScanner(r)
51+
}
15052

15153
LOOP:
152-
for idx < len(files) {
54+
55+
for n < len(files) {
15356
select {
57+
58+
// exit early if the context was cancelled
15459
case <-ctx.Done():
155-
return 0, ctx.Err()
156-
case file, ok := <-g.filesCh:
157-
if !ok {
60+
return n, ctx.Err()
61+
62+
default:
63+
// read the next file
64+
if g.scanner.Scan() {
65+
path := filepath.Join(g.root, g.path, g.scanner.Text())
66+
67+
g.log.Debugf("processing file: %s", path)
68+
69+
info, err := os.Stat(path)
70+
if os.IsNotExist(err) {
71+
// the underlying file might have been removed
72+
g.log.Warnf(
73+
"Path %s is in the worktree but appears to have been removed from the filesystem", path,
74+
)
75+
continue
76+
} else if err != nil {
77+
return n, fmt.Errorf("failed to stat %s: %w", path, err)
78+
}
79+
80+
files[n] = &File{
81+
Path: path,
82+
RelPath: filepath.Join(g.path, g.scanner.Text()),
83+
Info: info,
84+
}
85+
n++
86+
87+
} else {
88+
// nothing more to read
15889
err = io.EOF
15990
break LOOP
16091
}
161-
files[idx] = file
162-
idx++
16392
}
16493
}
16594

166-
return idx, err
95+
return n, err
16796
}
16897

16998
func (g *GitReader) Close() error {
17099
return g.eg.Wait()
171100
}
172101

173-
func NewGitReader(
102+
func NewGitWorktreeReader(
174103
root string,
175104
path string,
176105
statz *stats.Stats,
177-
batchSize int,
178106
) (*GitReader, error) {
179-
repo, err := git.PlainOpen(root)
180-
if err != nil {
181-
return nil, fmt.Errorf("failed to open git repository: %w", err)
182-
}
183107

184-
eg := &errgroup.Group{}
185-
186-
r := &GitReader{
187-
root: root,
188-
path: path,
189-
stats: statz,
190-
batchSize: batchSize,
191-
log: log.WithPrefix("walk[git]"),
192-
repo: repo,
193-
filesCh: make(chan *File, batchSize*runtime.NumCPU()),
194-
eg: eg,
195-
}
108+
// check if the root is a git repository
109+
cmd := exec.Command("git", "rev-parse", "--is-inside-work-tree")
110+
cmd.Dir = root
196111

197-
eg.Go(r.process)
112+
if out, err := cmd.Output(); err != nil {
113+
return nil, fmt.Errorf("failed to check if git repository is inside work tree: %w", err)
114+
} else if strings.Trim(string(out), "\n") != "true" {
115+
return nil, fmt.Errorf("git repository is not inside work tree")
116+
}
198117

199-
return r, nil
118+
return &GitReader{
119+
root: root,
120+
path: path,
121+
args: []string{"ls-files"},
122+
stats: statz,
123+
eg: &errgroup.Group{},
124+
log: log.WithPrefix("walk[git]"),
125+
}, nil
200126
}

walk/git_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ import (
1818
"github.com/stretchr/testify/require"
1919
)
2020

21-
func TestGitReader(t *testing.T) {
21+
func TestGitWorktreeReader(t *testing.T) {
2222
as := require.New(t)
2323

2424
tempDir := test.TempExamples(t)
@@ -40,7 +40,7 @@ func TestGitReader(t *testing.T) {
4040

4141
statz := stats.New()
4242

43-
reader, err := walk.NewGitReader(tempDir, "", &statz, 1024)
43+
reader, err := walk.NewGitWorktreeReader(tempDir, "", &statz)
4444
as.NoError(err)
4545

4646
count := 0

0 commit comments

Comments
 (0)