Skip to content

Commit e64e83e

Browse files
authored
Removes old, empty directories from datatype root directories (#99)
* Removes old, empty directories Pusher did not remove old directories. Every extant directory creates an inotify watch in pusher. Over time and/or for experiments (e.g., wehe) that create a large amount of directories, this can create a very large number of useless watches, which eat up memory, and eventually may run up against fs.inotify.max_user_watches, causing other containers on the system to fail. This commit also fixes an apparent variable name bug in which the value for maxFileAge was put into a variable named minFileAge. * Removes old, empty directories when walking dir Previously, directories were not touched. On long-running containers directories would begin to pile up causing a large amount of useless pusher inotify directories watches. * Uses Go v1.18 to build Docker image, and Alpine 3.15 * Updates Go modules * Reverts prometheus/client_golang to v1.11.0 I was running into this with newer versions: prometheus/prometheus#10574 * Updates Travis to use Go v1.18 * Reverts Go to v1.17 (from v1.18) The travis build was failing with something like this: mattn/goveralls#207 ... and the Docker build was failing with this error: Step 8/10 : COPY --from=build /go/bin/pusher / COPY failed: stat go/bin/pusher: file does not exist * Adds a clarifying comment about removing dirs * Simplifies dir removal logic, removes branching
1 parent 174abf0 commit e64e83e

File tree

6 files changed

+573
-103
lines changed

6 files changed

+573
-103
lines changed

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
language: go
22

33
go:
4-
- "1.13"
4+
- "1.17"
55

66
services:
77
- docker

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM golang:1.13 as build
1+
FROM golang:1.17 as build
22
# Add the local files to be sure we are building the local source code instead
33
# of downloading from GitHub.
44
# Don't add any of the other libraries, because we live at HEAD.
@@ -13,7 +13,7 @@ RUN go get \
1313
github.com/m-lab/pusher
1414

1515
# Now copy the built binary into a minimal base image.
16-
FROM alpine:3.7
16+
FROM alpine:3.15
1717
# By default, alpine has no root certs. Add them so pusher can use PKI to
1818
# verify that Google Cloud Storage is actually Google Cloud Storage.
1919
RUN apk add --no-cache ca-certificates

finder/findfiles.go

+50-3
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ package finder
1515

1616
import (
1717
"context"
18+
"io"
1819
"log"
1920
"os"
2021
"path/filepath"
@@ -27,6 +28,11 @@ import (
2728
"github.com/prometheus/client_golang/prometheus/promauto"
2829
)
2930

31+
// The minimum age of a directory before it will be considered for removal, if
32+
// it is also empty. 25h should ensure that the current day's directory is never
33+
// removed prematurely.
34+
const minDirectoryAge time.Duration = 25 * time.Hour
35+
3036
// Set up the prometheus metrics.
3137
var (
3238
pusherFinderRuns = promauto.NewCounter(prometheus.CounterOpts{
@@ -52,20 +58,22 @@ var (
5258

5359
// findFiles recursively searches through a given directory to find all the files which are old enough to be eligible for upload.
5460
// The list of files returned is sorted by mtime.
55-
func findFiles(datatype string, directory filename.System, minFileAge time.Duration) []filename.System {
61+
func findFiles(datatype string, directory filename.System, maxFileAge time.Duration) []filename.System {
5662
// Give an initial capacity to the slice. 1024 chosen because it's a nice round number.
5763
// TODO: Choose a better default.
5864
eligibleFiles := make(map[filename.System]os.FileInfo)
59-
eligibleTime := time.Now().Add(-minFileAge)
65+
eligibleTime := time.Now().Add(-maxFileAge)
6066
totalEligibleSize := int64(0)
6167

6268
err := filepath.Walk(string(directory), func(path string, info os.FileInfo, err error) error {
6369
if err != nil {
6470
// Any error terminates the walk.
6571
return err
6672
}
73+
// Check whether a directory is very old and empty, and removes it if so.
6774
if info.IsDir() {
68-
return nil
75+
err = checkDirectory(datatype, path, info.ModTime())
76+
return err
6977
}
7078
if eligibleTime.After(info.ModTime()) {
7179
eligibleFiles[filename.System(path)] = info
@@ -100,6 +108,45 @@ func findFiles(datatype string, directory filename.System, minFileAge time.Durat
100108
return fileList
101109
}
102110

111+
// checkDirectory checks to see if a directory is sufficiently old and empty.
112+
// If so, it removes the directory from the filesystem to prevent old, empty
113+
// directories from piling up in the filesystem.
114+
func checkDirectory(datatype string, path string, mTime time.Time) error {
115+
// Do not delete the root datatype directory.
116+
if datatype == filepath.Base(path) {
117+
return nil
118+
}
119+
// Do nothing if the directory is less than constant minDirectoryAge. This
120+
// could probably be more aggressive.
121+
eligibleTime := time.Now().Add(-minDirectoryAge)
122+
if mTime.After(eligibleTime) {
123+
return nil
124+
}
125+
f, err := os.Open(path)
126+
if err != nil {
127+
return err
128+
}
129+
defer f.Close()
130+
// Read the contents of the directory, looking only as far as the first file
131+
// found. We don't care how many files there are, only that at least one
132+
// exists. An error of type io.EOF indicates an empty directory.
133+
// https://pkg.go.dev/os#File.Readdirnames
134+
// https://stackoverflow.com/a/30708914
135+
// Implementation note: we are using Readdirnames() instead of Readdir()
136+
// because the former does not stat each file, but only returns file names,
137+
// which is more efficient for our use case.
138+
_, err = f.Readdirnames(1)
139+
if err != io.EOF {
140+
return err
141+
}
142+
err = os.Remove(path)
143+
if err != nil {
144+
return err
145+
}
146+
log.Printf("Removed old, empty directory %s.", path)
147+
return nil
148+
}
149+
103150
// FindForever repeatedly runs FindFiles until its context is canceled.
104151
//
105152
// It randomizes the inter-`find` sleep time in an effort to avoid thundering

finder/findfiles_test.go

+37-5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package finder_test
22

33
import (
44
"context"
5+
"errors"
6+
"io/fs"
57
"io/ioutil"
68
"os"
79
"testing"
@@ -25,6 +27,21 @@ func TestFindForever(t *testing.T) {
2527
rtx.Must(ioutil.WriteFile(tempdir+"/next_oldest_file", []byte("moredata\n"), 0644), "WriteFile failed")
2628
newtime = time.Now().Add(time.Duration(-12) * time.Hour)
2729
rtx.Must(os.Chtimes(tempdir+"/next_oldest_file", newtime, newtime), "Chtimes failed")
30+
// Set up the directories.
31+
//
32+
// An old, empty directory.
33+
rtx.Must(os.Mkdir(tempdir+"/old_empty_dir", 0750), "Mkdir failed")
34+
newtime = time.Now().Add(time.Duration(-26) * time.Hour)
35+
rtx.Must(os.Chtimes(tempdir+"/old_empty_dir", newtime, newtime), "Chtimes failed")
36+
// An old directory, but not empty.
37+
rtx.Must(os.Mkdir(tempdir+"/old_not_empty_dir", 0750), "Mkdir failed")
38+
newtime = time.Now().Add(time.Duration(-30) * time.Hour)
39+
rtx.Must(os.Chtimes(tempdir+"/old_not_empty_dir", newtime, newtime), "Chtimes failed")
40+
rtx.Must(ioutil.WriteFile(tempdir+"/old_not_empty_dir/test_file", []byte("data\n"), 0644), "WriteFile failed")
41+
newtime = time.Now().Add(time.Duration(-27) * time.Hour)
42+
rtx.Must(os.Chtimes(tempdir+"/old_not_empty_dir/test_file", newtime, newtime), "Chtimes failed")
43+
// A new directory.
44+
rtx.Must(os.Mkdir(tempdir+"/new_dir", 0750), "Mkdir failed")
2845
// Set up the receiver channel.
2946
foundFiles := make(chan filename.System)
3047
ctx, cancel := context.WithCancel(context.Background())
@@ -38,15 +55,30 @@ func TestFindForever(t *testing.T) {
3855
localfiles := []filename.System{
3956
<-foundFiles,
4057
<-foundFiles,
58+
<-foundFiles,
4159
}
42-
if len(localfiles) != 2 {
60+
// Test files.
61+
if len(localfiles) != 3 {
4362
t.Errorf("len(localfiles) (%d) != 2", len(localfiles))
4463
}
45-
if string(localfiles[0]) != tempdir+"/oldest_file" {
46-
t.Errorf("wrong name[0]: %s", localfiles[0])
64+
if string(localfiles[0]) != tempdir+"/old_not_empty_dir/test_file" {
65+
t.Errorf("wrong name[1]: %s", localfiles[0])
66+
}
67+
if string(localfiles[1]) != tempdir+"/oldest_file" {
68+
t.Errorf("wrong name[1]: %s", localfiles[0])
69+
}
70+
if string(localfiles[2]) != tempdir+"/next_oldest_file" {
71+
t.Errorf("wrong name[2]: %s", localfiles[1])
72+
}
73+
// Test directories.
74+
if _, err = os.Stat(tempdir + "/old_empty_dir"); errors.Is(err, fs.ErrExist) {
75+
t.Errorf("Directory %s/old_empty_dir exists, but shouldn't", tempdir)
76+
}
77+
if _, err = os.Stat(tempdir + "/old_not_empty_dir"); errors.Is(err, fs.ErrNotExist) {
78+
t.Errorf("Directory %s/old_not_empty_dir does not exist, but should", tempdir)
4779
}
48-
if string(localfiles[1]) != tempdir+"/next_oldest_file" {
49-
t.Errorf("wrong name[1]: %s", localfiles[1])
80+
if _, err = os.Stat(tempdir + "/new_dir"); errors.Is(err, fs.ErrNotExist) {
81+
t.Errorf("Directory %s/new_dir does not exist, but should", tempdir)
5082
}
5183
}
5284

go.mod

+15-8
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,20 @@ module github.com/m-lab/pusher
33
go 1.13
44

55
require (
6-
cloud.google.com/go/storage v1.6.0
7-
github.com/go-test/deep v1.0.5 // indirect
8-
github.com/googleapis/google-cloud-go-testing v0.0.0-20191008195207-8e1d251e947d
9-
github.com/m-lab/go v1.3.0
10-
github.com/prometheus/client_golang v1.3.0
6+
cloud.google.com/go v0.101.1 // indirect
7+
cloud.google.com/go/storage v1.22.0
8+
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de // indirect
9+
github.com/cespare/xxhash/v2 v2.1.2 // indirect
10+
github.com/go-kit/log v0.2.0 // indirect
11+
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
12+
github.com/googleapis/google-cloud-go-testing v0.0.0-20210719221736-1c9a4c676720
13+
github.com/json-iterator/go v1.1.12 // indirect
14+
github.com/m-lab/go v0.1.47
15+
github.com/prometheus/client_golang v1.11.0
16+
github.com/prometheus/common v0.32.1 // indirect
17+
github.com/prometheus/procfs v0.7.3 // indirect
1118
github.com/rjeczalik/notify v0.9.2
12-
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0
13-
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae
14-
google.golang.org/api v0.21.0
19+
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4
20+
golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6
21+
google.golang.org/api v0.79.0
1522
)

0 commit comments

Comments
 (0)