|
1 | 1 | package walk
|
2 | 2 |
|
3 | 3 | import (
|
| 4 | + "bufio" |
4 | 5 | "context"
|
5 | 6 | "fmt"
|
| 7 | + "github.com/charmbracelet/log" |
| 8 | + "github.com/numtide/treefmt/stats" |
| 9 | + "golang.org/x/sync/errgroup" |
6 | 10 | "io"
|
7 |
| - "io/fs" |
8 | 11 | "os"
|
| 12 | + "os/exec" |
9 | 13 | "path/filepath"
|
10 |
| - "runtime" |
11 | 14 | "strings"
|
12 |
| - |
13 |
| - "github.com/charmbracelet/log" |
14 |
| - "github.com/go-git/go-git/v5" |
15 |
| - "github.com/go-git/go-git/v5/plumbing/filemode" |
16 |
| - "github.com/numtide/treefmt/stats" |
17 |
| - "golang.org/x/sync/errgroup" |
18 | 15 | )
|
19 | 16 |
|
20 | 17 | type GitReader struct {
|
21 |
| - root string |
22 |
| - path string |
23 |
| - stats *stats.Stats |
24 |
| - batchSize int |
| 18 | + root string |
| 19 | + path string |
| 20 | + args []string |
25 | 21 |
|
26 |
| - log *log.Logger |
27 |
| - repo *git.Repository |
| 22 | + log *log.Logger |
| 23 | + stats *stats.Stats |
28 | 24 |
|
29 |
| - filesCh chan *File |
30 |
| - |
31 |
| - eg *errgroup.Group |
| 25 | + eg *errgroup.Group |
| 26 | + scanner *bufio.Scanner |
32 | 27 | }
|
33 | 28 |
|
34 |
| -func (g *GitReader) process() error { |
| 29 | +func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) { |
| 30 | + // ensure we record how many files we traversed |
35 | 31 | defer func() {
|
36 |
| - close(g.filesCh) |
| 32 | + g.stats.Add(stats.Traversed, int32(n)) |
37 | 33 | }()
|
38 | 34 |
|
39 |
| - gitIndex, err := g.repo.Storer.Index() |
40 |
| - if err != nil { |
41 |
| - return fmt.Errorf("failed to open git index: %w", err) |
42 |
| - } |
43 |
| - |
44 |
| - // if we need to walk a path that is not the root of the repository, we will read the directory structure of the |
45 |
| - // git index into memory for faster lookups |
46 |
| - var idxCache *filetree |
47 |
| - |
48 |
| - path := filepath.Clean(filepath.Join(g.root, g.path)) |
49 |
| - if !strings.HasPrefix(path, g.root) { |
50 |
| - return fmt.Errorf("path '%s' is outside of the root '%s'", path, g.root) |
51 |
| - } |
52 |
| - |
53 |
| - switch path { |
54 |
| - |
55 |
| - case g.root: |
56 |
| - |
57 |
| - // we can just iterate the index entries |
58 |
| - for _, entry := range gitIndex.Entries { |
59 |
| - |
60 |
| - // we only want regular files, not directories or symlinks |
61 |
| - if entry.Mode == filemode.Dir || entry.Mode == filemode.Symlink { |
62 |
| - continue |
63 |
| - } |
64 |
| - |
65 |
| - // stat the file |
66 |
| - path := filepath.Join(g.root, entry.Name) |
67 |
| - |
68 |
| - info, err := os.Lstat(path) |
69 |
| - if os.IsNotExist(err) { |
70 |
| - // the underlying file might have been removed without the change being staged yet |
71 |
| - g.log.Warnf("Path %s is in the index but appears to have been removed from the filesystem", path) |
72 |
| - continue |
73 |
| - } else if err != nil { |
74 |
| - return fmt.Errorf("failed to stat %s: %w", path, err) |
75 |
| - } |
76 |
| - |
77 |
| - // determine a relative path |
78 |
| - relPath, err := filepath.Rel(g.root, path) |
79 |
| - if err != nil { |
80 |
| - return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) |
81 |
| - } |
82 |
| - |
83 |
| - file := File{ |
84 |
| - Path: path, |
85 |
| - RelPath: relPath, |
86 |
| - Info: info, |
87 |
| - } |
88 |
| - |
89 |
| - g.stats.Add(stats.Traversed, 1) |
90 |
| - g.filesCh <- &file |
91 |
| - } |
92 |
| - |
93 |
| - default: |
94 |
| - |
95 |
| - // read the git index into memory if it hasn't already |
96 |
| - if idxCache == nil { |
97 |
| - idxCache = &filetree{name: ""} |
98 |
| - idxCache.readIndex(gitIndex) |
99 |
| - } |
100 |
| - |
101 |
| - // git index entries are relative to the repository root, so we need to determine a relative path for the |
102 |
| - // one we are currently processing before checking if it exists within the git index |
103 |
| - relPath, err := filepath.Rel(g.root, path) |
104 |
| - if err != nil { |
105 |
| - return fmt.Errorf("failed to find root relative path for %v: %w", path, err) |
106 |
| - } |
107 |
| - |
108 |
| - if !idxCache.hasPath(relPath) { |
109 |
| - log.Debugf("path %s not found in git index, skipping", relPath) |
110 |
| - return nil |
111 |
| - } |
112 |
| - |
113 |
| - err = filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error { |
114 |
| - // skip directories |
115 |
| - if info.IsDir() { |
116 |
| - return nil |
117 |
| - } |
118 |
| - |
119 |
| - // determine a path relative to g.root before checking presence in the git index |
120 |
| - relPath, err := filepath.Rel(g.root, path) |
121 |
| - if err != nil { |
122 |
| - return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) |
123 |
| - } |
124 |
| - |
125 |
| - if !idxCache.hasPath(relPath) { |
126 |
| - log.Debugf("path %v not found in git index, skipping", relPath) |
127 |
| - return nil |
128 |
| - } |
| 35 | + if g.scanner == nil { |
| 36 | + // create a pipe to capture the command output |
| 37 | + r, w := io.Pipe() |
129 | 38 |
|
130 |
| - file := File{ |
131 |
| - Path: path, |
132 |
| - RelPath: relPath, |
133 |
| - Info: info, |
134 |
| - } |
| 39 | + // create a command which will execute from the specified sub path within root |
| 40 | + cmd := exec.Command("git", g.args...) |
| 41 | + cmd.Dir = filepath.Join(g.root, g.path) |
| 42 | + cmd.Stdout = w |
135 | 43 |
|
136 |
| - g.stats.Add(stats.Traversed, 1) |
137 |
| - g.filesCh <- &file |
138 |
| - return nil |
| 44 | + // execute the command in the background |
| 45 | + g.eg.Go(func() error { |
| 46 | + return w.CloseWithError(cmd.Run()) |
139 | 47 | })
|
140 |
| - if err != nil { |
141 |
| - return fmt.Errorf("failed to walk %s: %w", path, err) |
142 |
| - } |
143 |
| - } |
144 | 48 |
|
145 |
| - return nil |
146 |
| -} |
147 |
| - |
148 |
| -func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) { |
149 |
| - idx := 0 |
| 49 | + // create a new scanner for reading the output |
| 50 | + g.scanner = bufio.NewScanner(r) |
| 51 | + } |
150 | 52 |
|
151 | 53 | LOOP:
|
152 |
| - for idx < len(files) { |
| 54 | + |
| 55 | + for n < len(files) { |
153 | 56 | select {
|
| 57 | + |
| 58 | + // exit early if the context was cancelled |
154 | 59 | case <-ctx.Done():
|
155 |
| - return 0, ctx.Err() |
156 |
| - case file, ok := <-g.filesCh: |
157 |
| - if !ok { |
| 60 | + return n, ctx.Err() |
| 61 | + |
| 62 | + default: |
| 63 | + // read the next file |
| 64 | + if g.scanner.Scan() { |
| 65 | + path := filepath.Join(g.root, g.path, g.scanner.Text()) |
| 66 | + |
| 67 | + g.log.Debugf("processing file: %s", path) |
| 68 | + |
| 69 | + info, err := os.Stat(path) |
| 70 | + if os.IsNotExist(err) { |
| 71 | + // the underlying file might have been removed |
| 72 | + g.log.Warnf( |
| 73 | + "Path %s is in the worktree but appears to have been removed from the filesystem", path, |
| 74 | + ) |
| 75 | + continue |
| 76 | + } else if err != nil { |
| 77 | + return n, fmt.Errorf("failed to stat %s: %w", path, err) |
| 78 | + } |
| 79 | + |
| 80 | + files[n] = &File{ |
| 81 | + Path: path, |
| 82 | + RelPath: filepath.Join(g.path, g.scanner.Text()), |
| 83 | + Info: info, |
| 84 | + } |
| 85 | + n++ |
| 86 | + |
| 87 | + } else { |
| 88 | + // nothing more to read |
158 | 89 | err = io.EOF
|
159 | 90 | break LOOP
|
160 | 91 | }
|
161 |
| - files[idx] = file |
162 |
| - idx++ |
163 | 92 | }
|
164 | 93 | }
|
165 | 94 |
|
166 |
| - return idx, err |
| 95 | + return n, err |
167 | 96 | }
|
168 | 97 |
|
169 | 98 | func (g *GitReader) Close() error {
|
170 | 99 | return g.eg.Wait()
|
171 | 100 | }
|
172 | 101 |
|
173 |
| -func NewGitReader( |
| 102 | +func NewGitWorktreeReader( |
174 | 103 | root string,
|
175 | 104 | path string,
|
176 | 105 | statz *stats.Stats,
|
177 |
| - batchSize int, |
178 | 106 | ) (*GitReader, error) {
|
179 |
| - repo, err := git.PlainOpen(root) |
180 |
| - if err != nil { |
181 |
| - return nil, fmt.Errorf("failed to open git repository: %w", err) |
182 |
| - } |
183 | 107 |
|
184 |
| - eg := &errgroup.Group{} |
185 |
| - |
186 |
| - r := &GitReader{ |
187 |
| - root: root, |
188 |
| - path: path, |
189 |
| - stats: statz, |
190 |
| - batchSize: batchSize, |
191 |
| - log: log.WithPrefix("walk[git]"), |
192 |
| - repo: repo, |
193 |
| - filesCh: make(chan *File, batchSize*runtime.NumCPU()), |
194 |
| - eg: eg, |
195 |
| - } |
| 108 | + // check if the root is a git repository |
| 109 | + cmd := exec.Command("git", "rev-parse", "--is-inside-work-tree") |
| 110 | + cmd.Dir = root |
196 | 111 |
|
197 |
| - eg.Go(r.process) |
| 112 | + if out, err := cmd.Output(); err != nil { |
| 113 | + return nil, fmt.Errorf("failed to check if git repository is inside work tree: %w", err) |
| 114 | + } else if strings.Trim(string(out), "\n") != "true" { |
| 115 | + return nil, fmt.Errorf("git repository is not inside work tree") |
| 116 | + } |
198 | 117 |
|
199 |
| - return r, nil |
| 118 | + return &GitReader{ |
| 119 | + root: root, |
| 120 | + path: path, |
| 121 | + args: []string{"ls-files"}, |
| 122 | + stats: statz, |
| 123 | + eg: &errgroup.Group{}, |
| 124 | + log: log.WithPrefix("walk[git]"), |
| 125 | + }, nil |
200 | 126 | }
|
0 commit comments