Skip to content

Commit 4e90c72

Browse files
author
Felix Breidenstein
committed
Implement local filecaching
1 parent 9a48dc3 commit 4e90c72

File tree

6 files changed

+275
-16
lines changed

6 files changed

+275
-16
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
s3-http-proxy
22
dist/
3+
cache/

README.md

+51-4
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,62 @@
22

33
Little proxy to access an private S3 bucket via HTTP.
44

5-
## Usage
5+
6+
## Usecase
7+
When your application stores it's assets in an S3 bucket and you use e.g.
8+
CloudFront, you can improve performance by configuring the bucket as a origin
9+
and point a custom behaviour like '/assets' to the bucket. This way, the assets
10+
get directly served from the bucket without shoving the request through your
11+
application stack. This also work perfectly for privat buckets because
12+
CloudFront can use an OAI (Origin Access Identity) to get permissions. When you
13+
now can't (for whatever reason) use CloudFront and just have a good old
14+
webserver/reverseproxy like e.g. nginx in front of your application but still
15+
wan't to directly serve assets from the bucket, you are out of luck because
16+
nginx can't deal with IAM credentials to access a private bucket (and you don't
17+
want to enable public access on your bucket!). Because we had this scenario for
18+
a few customers, we wrote this tool which you can run behind a
19+
webserver/reverseproxy and then configure an location block for '/assets' which
20+
routes the request to this tool, and get nearly the same behaviour as in the
21+
setup with CloudFront (obviously it's not a full blown CDN but you still get
22+
'direct' file access to the bucket without going through your app stack).
23+
24+
25+
## Minimal usage example
626
```
727
export S3PROXY_BUCKET="nameofmybucket"
8-
export S3PROXY_REGION="us-central-1"
9-
export S3PROXY_PORT="3000"
10-
./proxy
28+
./s3-http-proxy
1129
```
1230

1331
## Usage with Docker
1432
```
1533
docker run -e S3PROXY_BUCKET=nameofmybucket -p 3000:3000 --rm -it codemonauts/s3-http-proxy
1634
```
35+
36+
## Configuration
37+
All configuration happens via environment variables.
38+
39+
| Name | Required | Default | Description |
40+
| --------------- | :------: | -------------- | ------------------------------------------------------ |
41+
| S3PROXY_BUCKET | x | - | Name of the bucket |
42+
| S3PROXY_REGION | | "eu-central-1" | Region of the bucket |
43+
| S3PROXY_PORT | | "3000" | Listening port of the application |
44+
| S3PROXY_CACHING | | "" | Set this to a path if you wan't the files to be cached |
45+
| S3PROXY_LOGGING | | "WARN" | Loglevel ("ERROR","WARN","INFO","DEBUG") |
46+
47+
48+
## Caching
49+
This proxy can localy cache all files from S3 to disk for better performance. To
50+
enable caching just set *S3PROXY_CACHING* to a valid path (relative or absolut
51+
works both). The tool will then only do a HeadRequest to the bucket, when it has
52+
the file already in it's cache to see if the file is still up to date
53+
(Comparison of LastModified timestamp). If the file has changed in the bucket
54+
after we downloaded it, it will freshly get downloaded from the Bucket and
55+
replaced on disk before a response is send.
56+
57+
If you don't need this invalidation check for your files, you can also directly
58+
point your webserver to the cache directory of the plugin, because the files get
59+
saved to disk with the same folder structure as in S3 so they can directly be
60+
read and delivered by a webserver.
61+
62+
63+
With ❤ by [codemonauts](https://codemonauts.com)

filewrapper.go

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package main
2+
3+
import (
4+
"io"
5+
"os"
6+
7+
"github.com/aws/aws-sdk-go/service/s3"
8+
)
9+
10+
// FileWrapper wraps either a local file or an reponse from S3
11+
// It either contains a pointer to a local file and the reponse from a HeadObject request
12+
// or both of these are nil and it only contains an GetObject request
13+
type FileWrapper struct {
14+
File *os.File
15+
GetOutput *s3.GetObjectOutput
16+
HeadOutput *s3.HeadObjectOutput
17+
}
18+
19+
func (obj *FileWrapper) GetContent() io.Reader {
20+
if obj.File != nil {
21+
return obj.File
22+
} else {
23+
return obj.GetOutput.Body
24+
}
25+
}
26+
27+
func (obj *FileWrapper) GetContentType() string {
28+
if obj.GetOutput != nil {
29+
return *obj.GetOutput.ContentType
30+
} else {
31+
return *obj.HeadOutput.ContentType
32+
}
33+
}
34+
35+
func (obj *FileWrapper) GetMetadata() map[string]*string {
36+
if obj.GetOutput != nil {
37+
return obj.GetOutput.Metadata
38+
} else {
39+
return obj.HeadOutput.Metadata
40+
}
41+
}

go.mod

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@ module github.com/codemonauts/s3-http-proxy
22

33
go 1.16
44

5-
require github.com/aws/aws-sdk-go v1.38.61
5+
require (
6+
github.com/aws/aws-sdk-go v1.38.61
7+
github.com/sirupsen/logrus v1.8.1 // indirect
8+
)

go.sum

+6
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,27 @@ github.com/aws/aws-sdk-go v1.38.61 h1:wizuqQZe0K4iYJ+Slrs0aSQ4P94FAwqBUHwk46Iz5U
22
github.com/aws/aws-sdk-go v1.38.61/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
33
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
44
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
56
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
67
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
78
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
89
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
910
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
1011
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
1112
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
13+
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
14+
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
1215
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
16+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
1317
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
1418
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
1519
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
1620
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b h1:uwuIcX0g4Yl1NC5XAz37xsr2lTtcqevgzYNVt49waME=
1721
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
1822
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
1923
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
24+
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
25+
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
2026
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
2127
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
2228
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=

proxy.go

+172-11
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,156 @@
11
package main
22

33
import (
4+
"errors"
45
"fmt"
56
"io"
6-
"log"
77
"net/http"
88
"os"
9+
"path/filepath"
910

1011
"github.com/aws/aws-sdk-go/aws"
1112
"github.com/aws/aws-sdk-go/aws/session"
1213
"github.com/aws/aws-sdk-go/service/s3"
14+
log "github.com/sirupsen/logrus"
1315
)
1416

1517
var (
1618
s3Service *s3.S3
1719
bucketName string
20+
cachePath string
1821
)
1922

23+
// getFile checks if we have a local copy otherwise downloads from S3
24+
func getFile(key string) (FileWrapper, error) {
25+
if cachePath != "" {
26+
log.Debug("Trying to get file from cache")
27+
obj, err := getFileFromCache(key)
28+
29+
// Directly return file from Cache if we didn't got an error
30+
if err == nil {
31+
log.Info("Returning cached file")
32+
return obj, nil
33+
} else {
34+
log.Debug(err)
35+
}
36+
}
37+
38+
obj, err := getFileFromBucket(key)
39+
if err != nil {
40+
return FileWrapper{}, err
41+
}
42+
43+
log.Debug("Returning file from Bucket")
44+
return obj, nil
45+
46+
}
47+
48+
func getFileFromCache(key string) (FileWrapper, error) {
49+
filePath := filepath.Join(cachePath, key)
50+
51+
if fileStat, err := os.Stat(filePath); err == nil {
52+
// file in cache. check expire
53+
headRequest, err := s3Service.HeadObject(&s3.HeadObjectInput{
54+
Bucket: aws.String(bucketName),
55+
Key: aws.String(key),
56+
})
57+
58+
if err != nil {
59+
// We have a local file, but HeadObject returned an error, so we can
60+
// assume that the file no longer exists in the bucket
61+
os.Remove(filePath)
62+
log.Debug("Deleting local file")
63+
return FileWrapper{}, err
64+
}
65+
66+
if fileStat.ModTime().Before(*headRequest.LastModified) {
67+
// Our file is older than the one in the bucket
68+
os.Remove(filePath)
69+
return FileWrapper{}, errors.New("file not up to date")
70+
}
71+
72+
fh, err := os.Open(filePath)
73+
if err != nil {
74+
// Couldn't open cached file
75+
return FileWrapper{}, err
76+
}
77+
78+
return FileWrapper{
79+
File: fh,
80+
HeadOutput: headRequest,
81+
GetOutput: nil,
82+
}, nil
83+
84+
} else {
85+
// File not in cache or otherwise not accessible
86+
return FileWrapper{}, err
87+
}
88+
}
89+
90+
func getFileFromBucket(key string) (FileWrapper, error) {
91+
log.Info("Getting file from Bucket")
92+
93+
obj, err := s3Service.GetObject(&s3.GetObjectInput{
94+
Bucket: aws.String(bucketName),
95+
Key: aws.String(key),
96+
})
97+
98+
if err != nil {
99+
log.Errorf("Error while getting %q from S3: %s\n", key, err.Error())
100+
return FileWrapper{}, err
101+
}
102+
103+
s3File := FileWrapper{
104+
File: nil,
105+
HeadOutput: nil,
106+
GetOutput: obj,
107+
}
108+
109+
if cachePath != "" {
110+
path, err := saveFileToCache(key, obj)
111+
if err != nil {
112+
// We couldn't save the file to the cache but still return the Get response from S3
113+
log.Error(err)
114+
return s3File, nil
115+
}
116+
117+
fh, _ := os.Open(path)
118+
return FileWrapper{
119+
File: fh,
120+
HeadOutput: nil,
121+
GetOutput: obj,
122+
}, nil
123+
124+
}
125+
126+
return s3File, nil
127+
}
128+
129+
// createWithFolders creates the full nested directory structure and then creates the requested file
130+
func createWithFolders(p string) (*os.File, error) {
131+
if err := os.MkdirAll(filepath.Dir(p), 0770); err != nil {
132+
return nil, err
133+
}
134+
return os.Create(p)
135+
}
136+
137+
func saveFileToCache(key string, obj *s3.GetObjectOutput) (string, error) {
138+
log.Debug("Saving file to cache")
139+
filePath := filepath.Join(cachePath, key)
140+
141+
outFile, err := createWithFolders(filePath)
142+
if err != nil {
143+
log.Error("Couldn't create cache dir")
144+
return "", err
145+
}
146+
defer outFile.Close()
147+
148+
io.Copy(outFile, obj.Body)
149+
150+
return filePath, nil
151+
152+
}
153+
20154
func handler(w http.ResponseWriter, r *http.Request) {
21155
defer r.Body.Close()
22156

@@ -27,24 +161,30 @@ func handler(w http.ResponseWriter, r *http.Request) {
27161
return
28162
}
29163

30-
input := &s3.GetObjectInput{
31-
Bucket: aws.String(bucketName),
32-
Key: aws.String(key),
33-
}
34-
obj, err := s3Service.GetObject(input)
164+
log.WithFields(log.Fields{
165+
"key": key,
166+
}).Info("Got a request")
167+
168+
obj, err := getFile(key)
35169
if err != nil {
36-
log.Printf("Error while getting %q: %s\n", key, err.Error())
37170
w.WriteHeader(http.StatusForbidden)
38171
w.Write([]byte("Forbidden"))
39172
return
40173
}
41174

42-
defer obj.Body.Close()
175+
// Set correct ContentType
176+
w.Header().Set("Content-Type", obj.GetContentType())
43177

44-
w.Header().Set("Content-Type", *obj.ContentType)
178+
// Check for additional metadata
179+
metadata := obj.GetMetadata()
180+
if len(metadata) > 0 {
181+
for k, v := range metadata {
182+
w.Header().Set(k, *v)
183+
}
184+
}
45185

46186
// Directly copy all bytes from the S3 object into the HTTP reponse
47-
io.Copy(w, obj.Body)
187+
io.Copy(w, obj.GetContent())
48188
}
49189

50190
func envOrDefault(name string, defaultValue string) string {
@@ -59,18 +199,39 @@ func main() {
59199
region := envOrDefault("S3PROXY_REGION", "eu-central-1")
60200
port := envOrDefault("S3PROXY_PORT", "3000")
61201
bucketName = envOrDefault("S3PROXY_BUCKET", "")
202+
cachePath = envOrDefault("S3PROXY_CACHE", "")
203+
logLevel := envOrDefault("S3PROXY_LOGGING", "WARN")
204+
205+
l, err := log.ParseLevel(logLevel)
206+
if err != nil {
207+
log.Error("Unknown loglevel provided. Defaulting to WARN")
208+
log.SetLevel(log.WarnLevel)
209+
} else {
210+
log.SetLevel(l)
211+
}
62212

63213
if bucketName == "" {
64214
log.Fatal("You need to provide S3PROXY_BUCKET")
65215
}
66216

217+
if cachePath != "" {
218+
// Check if we have write access to the cache directory
219+
testPath := filepath.Join(cachePath, ".testfile")
220+
file, err := createWithFolders(testPath)
221+
if err != nil {
222+
log.Fatal("No write access to the cache dir")
223+
}
224+
defer file.Close()
225+
226+
}
227+
67228
sess := session.Must(session.NewSession(&aws.Config{
68229
Region: aws.String(region),
69230
}))
70231
s3Service = s3.New(sess)
71232

72233
http.HandleFunc("/", handler)
73234

74-
log.Printf("Listening on :%s \n", port)
235+
log.Info("Listening on :%s \n", port)
75236
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", port), nil))
76237
}

0 commit comments

Comments
 (0)