From 5f4c98b577bf26a3f4a9a7e9b795bf3c6a19b786 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Thu, 17 Oct 2024 11:54:09 +0100 Subject: [PATCH 1/3] feat: improve formatter/config change detection Busts the path cache if any of the following occurs: * global excludes changes * formatter added / removed * formatter command changes, including checking the modtime and size of the executable it points to * formatter includes/excludes changes * formatter priority changes Change detection is based on a sha256 hash of the above, which is stored in the db and compared before formatting. Closes #455 Signed-off-by: Brian McGee --- cmd/root_test.go | 103 ++++++++++++- format/{format.go => composite.go} | 186 +++++++++++------------ format/formatter.go | 24 +++ format/formatter_test.go | 229 +++++++++++++++++++++++++++++ test/{temp.go => test.go} | 19 ++- walk/cache/bucket.go | 4 - walk/cache/cache.go | 6 +- 7 files changed, 458 insertions(+), 113 deletions(-) rename format/{format.go => composite.go} (77%) rename test/{temp.go => test.go} (73%) diff --git a/cmd/root_test.go b/cmd/root_test.go index 1425dd32..f7de2f9f 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -56,7 +56,7 @@ func TestOnUnmatched(t *testing.T) { checkOutput := func(level string, output []byte) { for _, p := range paths { - as.Contains(string(output), fmt.Sprintf("%s format: no formatter for path: %s", level, p)) + as.Contains(string(output), fmt.Sprintf("%s composite-formatter: no formatter for path: %s", level, p)) } } @@ -605,7 +605,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { configPath := tempDir + "/touch.toml" // symlink some formatters into temp dir, so we can mess with their mod times - binPath := tempDir + "/bin" + binPath := filepath.Join(tempDir, "bin") as.NoError(os.Mkdir(binPath, 0o755)) binaries := []string{"black", "elm-format", "gofmt"} @@ -613,7 +613,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { for _, name := range binaries { src, err := exec.LookPath(name) as.NoError(err) - as.NoError(os.Symlink(src, binPath+"/"+name)) + as.NoError(os.Symlink(src, filepath.Join(binPath, name))) } // prepend our test bin directory to PATH @@ -647,7 +647,8 @@ func TestBustCacheOnFormatterChange(t *testing.T) { }) // tweak mod time of elm formatter - as.NoError(test.RecreateSymlink(t, binPath+"/"+"elm-format")) + newTime := time.Now().Add(-time.Minute) + as.NoError(test.Lutimes(t, filepath.Join(binPath, "elm-format"), newTime, newTime)) _, statz, err = treefmt(t, args...) as.NoError(err) @@ -671,7 +672,7 @@ func TestBustCacheOnFormatterChange(t *testing.T) { }) // tweak mod time of python formatter - as.NoError(test.RecreateSymlink(t, binPath+"/"+"black")) + as.NoError(test.Lutimes(t, filepath.Join(binPath, "black"), newTime, newTime)) _, statz, err = treefmt(t, args...) as.NoError(err) @@ -695,11 +696,12 @@ func TestBustCacheOnFormatterChange(t *testing.T) { }) // add go formatter - cfg.FormatterConfigs["go"] = &config.Formatter{ + goFormatter := &config.Formatter{ Command: "gofmt", Options: []string{"-w"}, Includes: []string{"*.go"}, } + cfg.FormatterConfigs["go"] = goFormatter test.WriteConfig(t, configPath, cfg) _, statz, err = treefmt(t, args...) @@ -723,6 +725,66 @@ func TestBustCacheOnFormatterChange(t *testing.T) { stats.Changed: 0, }) + // tweak go formatter options + goFormatter.Options = []string{"-w", "-s"} + + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 4, + stats.Formatted: 4, + stats.Changed: 0, + }) + + // tweak go formatter includes + goFormatter.Includes = []string{"foo/*"} + + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 3, + stats.Formatted: 3, + stats.Changed: 0, + }) + + // tweak go formatter excludes + goFormatter.Includes = []string{"*.go"} + goFormatter.Excludes = []string{"foo/*"} + + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 4, + stats.Formatted: 4, + stats.Changed: 0, + }) + + // add a priority + cfg.FormatterConfigs["go"].Priority = 3 + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 4, + stats.Formatted: 4, + stats.Changed: 0, + }) + // remove python formatter delete(cfg.FormatterConfigs, "python") test.WriteConfig(t, configPath, cfg) @@ -772,6 +834,35 @@ func TestBustCacheOnFormatterChange(t *testing.T) { stats.Formatted: 0, stats.Changed: 0, }) + + // change global excludes + cfg.Excludes = []string{"touch.toml"} + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 1, + stats.Formatted: 1, + stats.Changed: 0, + }) + + cfg.Excludes = nil + cfg.Global.Excludes = []string{"touch.toml", "yaml/test.yaml"} + + test.WriteConfig(t, configPath, cfg) + + _, statz, err = treefmt(t, args...) + as.NoError(err) + + assertStats(t, as, statz, map[stats.Type]int{ + stats.Traversed: 32, + stats.Matched: 1, + stats.Formatted: 1, + stats.Changed: 0, + }) } func TestGit(t *testing.T) { diff --git a/format/format.go b/format/composite.go similarity index 77% rename from format/format.go rename to format/composite.go index 86528122..a1313830 100644 --- a/format/format.go +++ b/format/composite.go @@ -3,6 +3,8 @@ package format import ( "cmp" "context" + "crypto/sha256" + "encoding/hex" "errors" "fmt" "os" @@ -85,10 +87,12 @@ func (b batchMap) Append(file *walk.File, matches []*Formatter) (key batchKey, b // CompositeFormatter handles the application of multiple Formatter instances based on global excludes and individual // formatter configuration. type CompositeFormatter struct { + cfg *config.Config stats *stats.Stats batchSize int globalExcludes []glob.Glob + log *log.Logger changeLevel log.Level unmatchedLevel log.Level @@ -141,7 +145,7 @@ func (c *CompositeFormatter) apply(ctx context.Context, key batchKey, batch []*w // record that a change in the underlying file occurred c.stats.Add(stats.Changed, 1) - log.Log( + c.log.Log( c.changeLevel, "file has changed", "path", file.RelPath, "prev_size", file.Info.Size(), @@ -168,7 +172,7 @@ func (c *CompositeFormatter) apply(ctx context.Context, key batchKey, batch []*w func (c *CompositeFormatter) match(file *walk.File) []*Formatter { // first check if this file has been globally excluded if pathMatches(file.RelPath, c.globalExcludes) { - log.Debugf("path matched global excludes: %s", file.RelPath) + c.log.Debugf("path matched global excludes: %s", file.RelPath) return nil } @@ -200,7 +204,7 @@ func (c *CompositeFormatter) Apply(ctx context.Context, files []*walk.File) erro return fmt.Errorf("no formatter for path: %s", file.RelPath) } - log.Logf(c.unmatchedLevel, "no formatter for path: %s", file.RelPath) + c.log.Logf(c.unmatchedLevel, "no formatter for path: %s", file.RelPath) // no further processing to be done, append to the release list toRelease = append(toRelease, file) @@ -239,92 +243,6 @@ func (c *CompositeFormatter) Apply(ctx context.Context, files []*walk.File) erro return nil } -// BustCache compares the currently configured formatters with their respective entries in the db. -// If a formatter was added, removed or modified, we clear any path entries from the cache, ensuring that all paths -// get formatted with the most recent formatter set. -func (c *CompositeFormatter) BustCache(db *bolt.DB) error { - return db.Update(func(tx *bolt.Tx) error { - clearPaths := false - - pathsBucket, err := cache.BucketPaths(tx) - if err != nil { - return fmt.Errorf("failed to get paths bucket from cache: %w", err) - } - - formattersBucket, err := cache.BucketFormatters(tx) - if err != nil { - return fmt.Errorf("failed to get formatters bucket from cache: %w", err) - } - - // check for any newly configured or modified formatters - for name, formatter := range c.formatters { - stat, err := os.Lstat(formatter.Executable()) - if err != nil { - return fmt.Errorf("failed to stat formatter executable %v: %w", formatter.Executable(), err) - } - - entry, err := formattersBucket.Get(name) - if !(err == nil || errors.Is(err, cache.ErrKeyNotFound)) { - return fmt.Errorf("failed to retrieve cache entry for formatter %v: %w", name, err) - } - - isNew := errors.Is(err, cache.ErrKeyNotFound) - hasChanged := !(isNew || (entry.Size == stat.Size() && entry.Modified == stat.ModTime())) - - if isNew { - log.Debugf("formatter '%s' is new", name) - } else if hasChanged { - log.Debug("formatter '%s' has changed", - name, - "size", stat.Size(), - "modTime", stat.ModTime(), - "cachedSize", entry.Size, - "cachedModTime", entry.Modified, - ) - } - - // update overall flag - clearPaths = clearPaths || isNew || hasChanged - - // record formatters info - entry = &cache.Entry{ - Size: stat.Size(), - Modified: stat.ModTime(), - } - - if err = formattersBucket.Put(name, entry); err != nil { - return fmt.Errorf("failed to write cache entry for formatter %v: %w", name, err) - } - } - - // check for any removed formatters - if err = formattersBucket.ForEach(func(key string, _ *cache.Entry) error { - _, ok := c.formatters[key] - if !ok { - // remove the formatter entry from the cache - if err = formattersBucket.Delete(key); err != nil { - return fmt.Errorf("failed to remove cache entry for formatter %v: %w", key, err) - } - // indicate a clean is required - clearPaths = true - } - - return nil - }); err != nil { - return fmt.Errorf("failed to check cache for removed formatters: %w", err) - } - - if clearPaths { - // remove all path entries - if err := pathsBucket.DeleteAll(); err != nil { - return fmt.Errorf("failed to remove all path entries from cache: %w", err) - } - } - - return nil - }) -} - // Close finalizes the processing of the CompositeFormatter, ensuring that any remaining batches are applied and // all formatters have completed their tasks. It returns an error if any formatting failures were detected. func (c *CompositeFormatter) Close(ctx context.Context) error { @@ -345,6 +263,84 @@ func (c *CompositeFormatter) Close(ctx context.Context) error { return nil } +// Hash takes anything that might affect the paths to be traversed, or how they are traversed, and adds it to a sha256 +// hash. +// This can be used to determine if there has been a material change in config or setup that requires the cache +// to be invalidated. +func (c *CompositeFormatter) Hash() (string, error) { + h := sha256.New() + + // start with the global excludes + h.Write([]byte(strings.Join(c.cfg.Excludes, " "))) + h.Write([]byte(strings.Join(c.cfg.Global.Excludes, " "))) + + // sort formatters determinstically + formatters := make([]*Formatter, 0, len(c.formatters)) + for _, f := range c.formatters { + formatters = append(formatters, f) + } + + slices.SortFunc(formatters, formatterSortFunc) + + // apply them to the hash + for _, f := range formatters { + if err := f.Hash(h); err != nil { + return "", fmt.Errorf("failed to hash formatter: %w", err) + } + } + + // finalize + return hex.EncodeToString(h.Sum(nil)), nil +} + +// BustCache compares the current Hash() of this formatter with the last entry stored in the db. +// If it has changed, we remove all the path entries, forcing a fresh cache state. +func (c *CompositeFormatter) BustCache(db *bolt.DB) error { + // determine current hash + currentHash, err := c.Hash() + if err != nil { + return fmt.Errorf("failed to hash formatter: %w", err) + } + + hashKey := []byte("sha256") + bucketKey := []byte("formatters") + + return db.Update(func(tx *bolt.Tx) error { + // get or create the formatters bucket + bucket, err := tx.CreateBucketIfNotExists(bucketKey) + if err != nil { + return fmt.Errorf("failed to create formatters bucket: %w", err) + } + + // load the previous hash which might be nil + prevHash := bucket.Get(hashKey) + + c.log.Debug( + "comparising formatter hash with db", + "prev_hash", string(prevHash), + "current_hash", currentHash, + ) + + // compare with the previous hash + // if they are different, delete all the path entries + if string(prevHash) != currentHash { + c.log.Debug("hash has changed, deleting all paths") + + paths, err := cache.BucketPaths(tx) + if err != nil { + return fmt.Errorf("failed to get paths bucket from cache: %w", err) + } + + if err = paths.DeleteAll(); err != nil { + return fmt.Errorf("failed to delete paths bucket: %w", err) + } + } + + // save the latest hash + return bucket.Put(hashKey, []byte(currentHash)) + }) +} + func NewCompositeFormatter( cfg *config.Config, statz *stats.Stats, @@ -394,14 +390,18 @@ func NewCompositeFormatter( eg.SetLimit(runtime.NumCPU()) return &CompositeFormatter{ + cfg: cfg, stats: statz, batchSize: batchSize, globalExcludes: globalExcludes, + + log: log.WithPrefix("composite-formatter"), changeLevel: changeLevel, unmatchedLevel: unmatchedLevel, - formatters: formatters, - eg: &eg, - batches: make(batchMap), - formatError: new(atomic.Bool), + + eg: &eg, + formatters: formatters, + batches: make(batchMap), + formatError: new(atomic.Bool), }, nil } diff --git a/format/formatter.go b/format/formatter.go index 5254565d..a3adda5b 100644 --- a/format/formatter.go +++ b/format/formatter.go @@ -4,9 +4,11 @@ import ( "context" "errors" "fmt" + "hash" "os" "os/exec" "regexp" + "strings" "time" "github.com/charmbracelet/log" @@ -52,6 +54,28 @@ func (f *Formatter) Executable() string { return f.executable } +// Hash adds this formatter's config and executable info to the config hash being created. +func (f *Formatter) Hash(h hash.Hash) error { + h.Write([]byte(f.name)) + h.Write([]byte(strings.Join(f.config.Options, " "))) + h.Write([]byte(strings.Join(f.config.Includes, " "))) + h.Write([]byte(strings.Join(f.config.Excludes, " "))) + h.Write([]byte(fmt.Sprintf("%d", f.config.Priority))) + + // stat the formatter's executable + info, err := os.Lstat(f.executable) + if err != nil { + return fmt.Errorf("failed to stat formatter executable: %w", err) + } + + // include the executable's size and mod time + // if the formatter executable changes (e.g. new version) we should invalidate the cache + h.Write([]byte(fmt.Sprintf("%d", info.Size()))) + h.Write([]byte(fmt.Sprintf("%d", info.ModTime().Unix()))) + + return nil +} + func (f *Formatter) Apply(ctx context.Context, files []*walk.File) error { start := time.Now() diff --git a/format/formatter_test.go b/format/formatter_test.go index 795ed4f2..d3b617d1 100644 --- a/format/formatter_test.go +++ b/format/formatter_test.go @@ -1,11 +1,16 @@ package format_test import ( + "os" + "os/exec" + "path/filepath" "testing" + "time" "github.com/numtide/treefmt/config" "github.com/numtide/treefmt/format" "github.com/numtide/treefmt/stats" + "github.com/numtide/treefmt/test" "github.com/stretchr/testify/require" ) @@ -47,3 +52,227 @@ func TestInvalidFormatterName(t *testing.T) { as.ErrorIs(err, format.ErrInvalidName) } } + +func TestFormatterHash(t *testing.T) { + as := require.New(t) + + const batchSize = 1024 + + statz := stats.New() + + tempDir := t.TempDir() + + // symlink some formatters into temp dir, so we can mess with their mod times + binPath := filepath.Join(tempDir, "bin") + as.NoError(os.Mkdir(binPath, 0o755)) + + binaries := []string{"black", "elm-format", "gofmt"} + + for _, name := range binaries { + src, err := exec.LookPath(name) + as.NoError(err) + as.NoError(os.Symlink(src, filepath.Join(binPath, name))) + } + + // prepend our test bin directory to PATH + t.Setenv("PATH", binPath+":"+os.Getenv("PATH")) + + // start with 2 formatters + cfg := &config.Config{ + OnUnmatched: "info", + FormatterConfigs: map[string]*config.Formatter{ + "python": { + Command: "black", + Includes: []string{"*.py"}, + }, + "elm": { + Command: "elm-format", + Options: []string{"--yes"}, + Includes: []string{"*.elm"}, + }, + }, + } + + f, err := format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + // hash for the first time + h, err := f.Hash() + as.NoError(err) + + // hash again without making changes + h2, err := f.Hash() + as.NoError(err) + as.Equal(h, h2, "hash should not have changed") + + t.Run("change formatter mod time", func(t *testing.T) { + for _, name := range binaries { + // tweak mod time + newTime := time.Now().Add(-time.Minute) + as.NoError(test.Lutimes(t, filepath.Join(binPath, name), newTime, newTime)) + + h3, err := f.Hash() + as.NoError(err) + as.NotEqual(h2, h3, "hash should have changed") + + // hash again without making changes + h4, err := f.Hash() + as.NoError(err) + as.Equal(h3, h4, "hash should not have changed") + } + }) + + t.Run("modify formatter options", func(_ *testing.T) { + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h3, err := f.Hash() + as.NoError(err) + + // adjust python includes + python := cfg.FormatterConfigs["python"] + python.Includes = []string{"*.py", "*.pyi"} + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h4, err := f.Hash() + as.NoError(err) + as.NotEqual(h3, h4, "hash should have changed") + + // hash again without making changes + h5, err := f.Hash() + as.NoError(err) + as.Equal(h4, h5, "hash should not have changed") + + // adjust python excludes + python.Excludes = []string{"*.pyi"} + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h6, err := f.Hash() + as.NoError(err) + as.NotEqual(h5, h6, "hash should have changed") + + // hash again without making changes + h7, err := f.Hash() + as.NoError(err) + as.Equal(h6, h7, "hash should not have changed") + + // adjust python options + python.Options = []string{"-w", "-s"} + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h8, err := f.Hash() + as.NoError(err) + as.NotEqual(h7, h8, "hash should have changed") + + // hash again without making changes + h9, err := f.Hash() + as.NoError(err) + as.Equal(h8, h9, "hash should not have changed") + + // adjust python priority + python.Priority = 100 + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h10, err := f.Hash() + as.NoError(err) + as.NotEqual(h9, h10, "hash should have changed") + + // hash again without making changes + h11, err := f.Hash() + as.NoError(err) + as.Equal(h10, h11, "hash should not have changed") + }) + + t.Run("add/remove formatters", func(_ *testing.T) { + cfg.FormatterConfigs["go"] = &config.Formatter{ + Command: "gofmt", + Options: []string{"-w"}, + Includes: []string{"*.go"}, + } + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h3, err := f.Hash() + as.NoError(err) + as.NotEqual(h2, h3, "hash should have changed") + + // remove python formatter + delete(cfg.FormatterConfigs, "python") + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h4, err := f.Hash() + as.NoError(err) + as.NotEqual(h3, h4, "hash should have changed") + + // hash again without making changes + h5, err := f.Hash() + as.NoError(err) + as.Equal(h4, h5, "hash should not have changed") + + // remove elm formatter + delete(cfg.FormatterConfigs, "elm") + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h6, err := f.Hash() + as.NoError(err) + as.NotEqual(h5, h6, "hash should have changed") + + // hash again without making changes + h7, err := f.Hash() + as.NoError(err) + as.Equal(h6, h7, "hash should not have changed") + }) + + t.Run("modify global excludes", func(_ *testing.T) { + cfg.Excludes = []string{"*.go"} + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h3, err := f.Hash() + as.NoError(err) + + cfg.Excludes = []string{"*.go", "*.hs"} + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h4, err := f.Hash() + as.NoError(err) + as.NotEqual(h3, h4, "hash should have changed") + + // hash again without making changes + h5, err := f.Hash() + as.NoError(err) + as.Equal(h4, h5, "hash should not have changed") + + // test deprecated global excludes + cfg.Excludes = nil + cfg.Global.Excludes = []string{"*.go", "*.hs"} + + f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + as.NoError(err) + + h6, err := f.Hash() + as.NoError(err) + as.Equal(h4, h6, "Global.Excludes should produce same hash with same values as Excludes") + + // hash again without making changes + h7, err := f.Hash() + as.NoError(err) + as.Equal(h6, h7, "hash should not have changed") + }) +} diff --git a/test/temp.go b/test/test.go similarity index 73% rename from test/temp.go rename to test/test.go index 274cb1dc..1b98f486 100644 --- a/test/temp.go +++ b/test/test.go @@ -1,6 +1,7 @@ package test import ( + "fmt" "os" "testing" "time" @@ -9,6 +10,7 @@ import ( "github.com/numtide/treefmt/config" cp "github.com/otiai10/copy" "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" ) func WriteConfig(t *testing.T, path string, cfg *config.Config) { @@ -60,13 +62,20 @@ func TempFile(t *testing.T, dir string, pattern string, contents *string) *os.Fi return file } -func RecreateSymlink(t *testing.T, path string) error { +// Lutimes is a convenience wrapper for using unix.Lutimes +// TODO: this will need adapted if we support Windows. +func Lutimes(t *testing.T, path string, atime time.Time, mtime time.Time) error { t.Helper() - src, err := os.Readlink(path) + var utimes [2]unix.Timeval + utimes[0] = unix.NsecToTimeval(atime.UnixNano()) + utimes[1] = unix.NsecToTimeval(mtime.UnixNano()) - require.NoError(t, err, "failed to read symlink") - require.NoError(t, os.Remove(path), "failed to remove symlink") + // Change the timestamps of the symlink itself + err := unix.Lutimes(path, utimes[0:]) + if err != nil { + return fmt.Errorf("failed to change times: %w", err) + } - return os.Symlink(src, path) + return nil } diff --git a/walk/cache/bucket.go b/walk/cache/bucket.go index dd2334e0..16221305 100644 --- a/walk/cache/bucket.go +++ b/walk/cache/bucket.go @@ -76,10 +76,6 @@ func BucketPaths(tx *bolt.Tx) (*Bucket[Entry], error) { return cacheBucket(bucketPaths, tx) } -func BucketFormatters(tx *bolt.Tx) (*Bucket[Entry], error) { - return cacheBucket(bucketFormatters, tx) -} - func cacheBucket(name string, tx *bolt.Tx) (*Bucket[Entry], error) { var ( err error diff --git a/walk/cache/cache.go b/walk/cache/cache.go index fba706f0..ba18af5d 100644 --- a/walk/cache/cache.go +++ b/walk/cache/cache.go @@ -48,11 +48,7 @@ func Open(root string) (*bolt.DB, error) { func EnsureBuckets(db *bolt.DB) error { // force creation of buckets if they don't already exist return db.Update(func(tx *bolt.Tx) error { - if _, err := BucketPaths(tx); err != nil { - return err - } - - _, err := BucketFormatters(tx) + _, err := BucketPaths(tx) return err }) From b9c056893fc12e24e09039763e7065dd5d810d01 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Thu, 17 Oct 2024 12:24:45 +0100 Subject: [PATCH 2/3] chore: remove unused code Signed-off-by: Brian McGee --- format/task.go | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 format/task.go diff --git a/format/task.go b/format/task.go deleted file mode 100644 index 922dc6db..00000000 --- a/format/task.go +++ /dev/null @@ -1,45 +0,0 @@ -package format - -import ( - "cmp" - "slices" - - "github.com/numtide/treefmt/walk" -) - -type Task struct { - File *walk.File - Formatters []*Formatter - BatchKey string - Errors []error -} - -func NewTask(file *walk.File, formatters []*Formatter) Task { - // sort by priority in ascending order - slices.SortFunc(formatters, func(a, b *Formatter) int { - priorityA := a.Priority() - priorityB := b.Priority() - - result := priorityA - priorityB - if result == 0 { - // formatters with the same priority are sorted lexicographically to ensure a deterministic outcome - result = cmp.Compare(a.Name(), b.Name()) - } - - return result - }) - - // construct a batch key which represents the unique sequence of formatters to be applied to file - var key string - for _, f := range formatters { - key += f.name + ":" - } - - key = key[:len(key)-1] - - return Task{ - File: file, - Formatters: formatters, - BatchKey: key, - } -} From fd5996948aba75fc78063609d556c41f58d393c1 Mon Sep 17 00:00:00 2001 From: Brian McGee Date: Thu, 17 Oct 2024 12:52:08 +0100 Subject: [PATCH 3/3] feat: configurable batch size Signed-off-by: Brian McGee --- cmd/format/format.go | 8 ++------ cmd/root_test.go | 35 +++++++++++++++++++++++++++++++++++ config/config.go | 22 ++++++++++++++++++++++ format/composite.go | 3 +-- format/formatter_test.go | 35 +++++++++++++++-------------------- 5 files changed, 75 insertions(+), 28 deletions(-) diff --git a/cmd/format/format.go b/cmd/format/format.go index 40dcc094..1012efd3 100644 --- a/cmd/format/format.go +++ b/cmd/format/format.go @@ -24,10 +24,6 @@ import ( bolt "go.etcd.io/bbolt" ) -const ( - BatchSize = 1024 -) - var ErrFailOnChange = errors.New("unexpected changes detected, --fail-on-change is enabled") func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) error { @@ -156,7 +152,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) } // create a composite formatter which will handle applying the correct formatters to each file we traverse - formatter, err := format.NewCompositeFormatter(cfg, statz, BatchSize) + formatter, err := format.NewCompositeFormatter(cfg, statz) if err != nil { return fmt.Errorf("failed to create composite formatter: %w", err) } @@ -175,7 +171,7 @@ func Run(v *viper.Viper, statz *stats.Stats, cmd *cobra.Command, paths []string) } // start traversing - files := make([]*walk.File, BatchSize) + files := make([]*walk.File, cfg.BatchSize) for { // read the next batch diff --git a/cmd/root_test.go b/cmd/root_test.go index f7de2f9f..281907c2 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -112,6 +112,41 @@ func TestCpuProfile(t *testing.T) { as.NoError(err) } +func TestBatchSize(t *testing.T) { + as := require.New(t) + tempDir := test.TempExamples(t) + + // capture current cwd, so we can replace it after the test is finished + cwd, err := os.Getwd() + as.NoError(err) + + t.Cleanup(func() { + // return to the previous working directory + as.NoError(os.Chdir(cwd)) + }) + + _, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "10241") + as.ErrorIs(err, config.ErrInvalidBatchSize) + + _, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "-10241") + as.ErrorContains(err, "invalid argument") + + _, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter") + as.NoError(err) + + out, _, err := treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "1", "-v") + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("INFO config: batch size = %d", 1)) + + out, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "4", "-v") + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("INFO config: batch size = %d", 4)) + + out, _, err = treefmt(t, "-C", tempDir, "--allow-missing-formatter", "--batch-size", "128", "-v") + as.NoError(err) + as.Contains(string(out), fmt.Sprintf("INFO config: batch size = %d", 128)) +} + func TestAllowMissingFormatter(t *testing.T) { as := require.New(t) diff --git a/config/config.go b/config/config.go index 906e9030..b0f479b7 100644 --- a/config/config.go +++ b/config/config.go @@ -6,14 +6,18 @@ import ( "path/filepath" "strings" + "github.com/charmbracelet/log" "github.com/numtide/treefmt/walk" "github.com/spf13/pflag" "github.com/spf13/viper" ) +var ErrInvalidBatchSize = fmt.Errorf("batch size must be between 1 and 10,240") + // Config is used to represent the list of configured Formatters. type Config struct { AllowMissingFormatter bool `mapstructure:"allow-missing-formatter" toml:"allow-missing-formatter,omitempty"` + BatchSize int `mapstructure:"batch-size" toml:"batch-size,omitempty"` CI bool `mapstructure:"ci" toml:"ci,omitempty"` ClearCache bool `mapstructure:"clear-cache" toml:"-"` // not allowed in config CPUProfile string `mapstructure:"cpu-profile" toml:"cpu-profile,omitempty"` @@ -59,6 +63,9 @@ func SetFlags(fs *pflag.FlagSet) { "allow-missing-formatter", false, "Do not exit with error if a configured formatter is missing. (env $TREEFMT_ALLOW_MISSING_FORMATTER)", ) + fs.Uint("batch-size", 1024, + "The maximum number of files to pass to a formatter at once. (env $TREEFMT_BATCH_SIZE)", + ) fs.Bool( "ci", false, "Runs treefmt in a CI mode, enabling --no-cache, --fail-on-change and adjusting some other settings "+ @@ -235,6 +242,21 @@ func FromViper(v *viper.Viper) (*Config, error) { } } + // validate batch size + // todo what is a reasonable upper limit on this? + + // default if it isn't set (e.g. in tests when using Config directly) + if cfg.BatchSize == 0 { + cfg.BatchSize = 1024 + } + + if !(1 <= cfg.BatchSize && cfg.BatchSize <= 10240) { + return nil, ErrInvalidBatchSize + } + + l := log.WithPrefix("config") + l.Infof("batch size = %d", cfg.BatchSize) + return cfg, nil } diff --git a/format/composite.go b/format/composite.go index a1313830..b418c8e6 100644 --- a/format/composite.go +++ b/format/composite.go @@ -344,7 +344,6 @@ func (c *CompositeFormatter) BustCache(db *bolt.DB) error { func NewCompositeFormatter( cfg *config.Config, statz *stats.Stats, - batchSize int, ) (*CompositeFormatter, error) { // compile global exclude globs globalExcludes, err := compileGlobs(cfg.Excludes) @@ -392,7 +391,7 @@ func NewCompositeFormatter( return &CompositeFormatter{ cfg: cfg, stats: statz, - batchSize: batchSize, + batchSize: cfg.BatchSize, globalExcludes: globalExcludes, log: log.WithPrefix("composite-formatter"), diff --git a/format/formatter_test.go b/format/formatter_test.go index d3b617d1..c71eac3a 100644 --- a/format/formatter_test.go +++ b/format/formatter_test.go @@ -17,15 +17,13 @@ import ( func TestInvalidFormatterName(t *testing.T) { as := require.New(t) - const batchSize = 1024 - cfg := &config.Config{} cfg.OnUnmatched = "info" statz := stats.New() // simple "empty" config - _, err := format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err := format.NewCompositeFormatter(cfg, &statz) as.NoError(err) // valid name using all the acceptable characters @@ -35,7 +33,7 @@ func TestInvalidFormatterName(t *testing.T) { }, } - _, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) // test with some bad examples @@ -48,7 +46,7 @@ func TestInvalidFormatterName(t *testing.T) { }, } - _, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + _, err = format.NewCompositeFormatter(cfg, &statz) as.ErrorIs(err, format.ErrInvalidName) } } @@ -56,10 +54,7 @@ func TestInvalidFormatterName(t *testing.T) { func TestFormatterHash(t *testing.T) { as := require.New(t) - const batchSize = 1024 - statz := stats.New() - tempDir := t.TempDir() // symlink some formatters into temp dir, so we can mess with their mod times @@ -93,7 +88,7 @@ func TestFormatterHash(t *testing.T) { }, } - f, err := format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err := format.NewCompositeFormatter(cfg, &statz) as.NoError(err) // hash for the first time @@ -123,7 +118,7 @@ func TestFormatterHash(t *testing.T) { }) t.Run("modify formatter options", func(_ *testing.T) { - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h3, err := f.Hash() @@ -133,7 +128,7 @@ func TestFormatterHash(t *testing.T) { python := cfg.FormatterConfigs["python"] python.Includes = []string{"*.py", "*.pyi"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h4, err := f.Hash() @@ -148,7 +143,7 @@ func TestFormatterHash(t *testing.T) { // adjust python excludes python.Excludes = []string{"*.pyi"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h6, err := f.Hash() @@ -163,7 +158,7 @@ func TestFormatterHash(t *testing.T) { // adjust python options python.Options = []string{"-w", "-s"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h8, err := f.Hash() @@ -178,7 +173,7 @@ func TestFormatterHash(t *testing.T) { // adjust python priority python.Priority = 100 - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h10, err := f.Hash() @@ -198,7 +193,7 @@ func TestFormatterHash(t *testing.T) { Includes: []string{"*.go"}, } - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h3, err := f.Hash() @@ -208,7 +203,7 @@ func TestFormatterHash(t *testing.T) { // remove python formatter delete(cfg.FormatterConfigs, "python") - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h4, err := f.Hash() @@ -223,7 +218,7 @@ func TestFormatterHash(t *testing.T) { // remove elm formatter delete(cfg.FormatterConfigs, "elm") - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h6, err := f.Hash() @@ -239,7 +234,7 @@ func TestFormatterHash(t *testing.T) { t.Run("modify global excludes", func(_ *testing.T) { cfg.Excludes = []string{"*.go"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h3, err := f.Hash() @@ -247,7 +242,7 @@ func TestFormatterHash(t *testing.T) { cfg.Excludes = []string{"*.go", "*.hs"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h4, err := f.Hash() @@ -263,7 +258,7 @@ func TestFormatterHash(t *testing.T) { cfg.Excludes = nil cfg.Global.Excludes = []string{"*.go", "*.hs"} - f, err = format.NewCompositeFormatter(cfg, &statz, batchSize) + f, err = format.NewCompositeFormatter(cfg, &statz) as.NoError(err) h6, err := f.Hash()