Skip to content
This repository was archived by the owner on Nov 27, 2023. It is now read-only.

Commit 101e155

Browse files
committed
Some more functional design
Signed-off-by: Nicolas De Loof <[email protected]>
1 parent dbe87e2 commit 101e155

File tree

2 files changed

+121
-51
lines changed

2 files changed

+121
-51
lines changed

ecs/gpu.go

Lines changed: 109 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package ecs
1818

1919
import (
2020
"fmt"
21+
"math"
2122
"strconv"
2223

2324
"github.com/compose-spec/compose-go/types"
@@ -74,69 +75,138 @@ func (f family) firstOrError(msg string, args ...interface{}) (machine, error) {
7475
}
7576

7677
func guessMachineType(project *types.Project) (string, error) {
77-
// we select a machine type to match all gpu-bound services requirements
78+
// we select a machine type to match all gpus-bound services requirements
7879
// once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service.
79-
minMemory, minCPU, minGPU, err := getResourceRequirements(project)
80+
requirements, err := getResourceRequirements(project)
8081
if err != nil {
8182
return "", err
8283
}
8384

8485
instanceType, err := p3family.
8586
filter(func(m machine) bool {
86-
return m.memory >= minMemory
87+
return m.memory >= requirements.memory
8788
}).
8889
filter(func(m machine) bool {
89-
return m.cpus >= minCPU
90+
return m.cpus >= requirements.cpus
9091
}).
9192
filter(func(m machine) bool {
92-
return m.gpus >= minGPU
93+
return m.gpus >= requirements.gpus
9394
}).
94-
firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpu:%d", minMemory, minCPU, minGPU)
95+
firstOrError("none of the AWS p3 machines match requirement for memory:%d cpu:%f gpus:%d", requirements.memory, requirements.cpus, requirements.gpus)
9596
if err != nil {
9697
return "", err
9798
}
9899
return instanceType.id, nil
99100
}
100101

101-
func getResourceRequirements(project *types.Project) (types.UnitBytes, float64, int64, error) {
102-
var minMemory types.UnitBytes
103-
var minCPU float64
104-
var minGPU int64
102+
type resourceRequirements struct {
103+
memory types.UnitBytes
104+
cpus float64
105+
gpus int64
106+
}
107+
108+
func getResourceRequirements(project *types.Project) (*resourceRequirements, error) {
109+
return toResourceRequirementsSlice(project).
110+
filter(func(requirements *resourceRequirements) bool {
111+
return requirements.gpus != 0
112+
}).
113+
max()
114+
}
115+
116+
type eitherRequirementsOrError struct {
117+
requirements []*resourceRequirements
118+
err error
119+
}
120+
121+
func toResourceRequirementsSlice(project *types.Project) eitherRequirementsOrError {
122+
var requirements []*resourceRequirements
105123
for _, service := range project.Services {
106-
if service.Deploy == nil {
107-
continue
108-
}
109-
reservations := service.Deploy.Resources.Reservations
110-
if reservations == nil {
111-
continue
124+
r, err := toResourceRequirements(service)
125+
if err != nil {
126+
return eitherRequirementsOrError{nil, err}
112127
}
128+
requirements = append(requirements, r)
129+
}
130+
return eitherRequirementsOrError{requirements, nil}
131+
}
113132

114-
var requiredGPUs int64
115-
for _, r := range reservations.GenericResources {
116-
if r.DiscreteResourceSpec.Kind == "gpu" {
117-
requiredGPUs = r.DiscreteResourceSpec.Value
118-
break
119-
}
120-
}
121-
if requiredGPUs == 0 {
122-
continue
123-
}
124-
if requiredGPUs > minGPU {
125-
minGPU = requiredGPUs
133+
func (r eitherRequirementsOrError) filter(fn func(*resourceRequirements) bool) eitherRequirementsOrError {
134+
if r.err != nil {
135+
return r
136+
}
137+
var requirements []*resourceRequirements
138+
for _, req := range r.requirements {
139+
if fn(req) {
140+
requirements = append(requirements, req)
126141
}
142+
}
143+
return eitherRequirementsOrError{requirements, nil}
144+
}
127145

128-
if reservations.MemoryBytes > minMemory {
129-
minMemory = reservations.MemoryBytes
146+
func toResourceRequirements(service types.ServiceConfig) (*resourceRequirements, error) {
147+
if service.Deploy == nil {
148+
return nil, nil
149+
}
150+
reservations := service.Deploy.Resources.Reservations
151+
if reservations == nil {
152+
return nil, nil
153+
}
154+
155+
var requiredGPUs int64
156+
for _, r := range reservations.GenericResources {
157+
if r.DiscreteResourceSpec.Kind == "gpus" {
158+
requiredGPUs = r.DiscreteResourceSpec.Value
159+
break
130160
}
131-
if reservations.NanoCPUs != "" {
132-
nanocpu, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
133-
if err != nil {
134-
return 0, 0, 0, err
135-
}
136-
if nanocpu > minCPU {
137-
minCPU = nanocpu
138-
}
161+
}
162+
163+
var nanocpu float64
164+
if reservations.NanoCPUs != "" {
165+
v, err := strconv.ParseFloat(reservations.NanoCPUs, 64)
166+
if err != nil {
167+
return nil, err
139168
}
169+
nanocpu = v
170+
}
171+
return &resourceRequirements{
172+
memory: reservations.MemoryBytes,
173+
cpus: nanocpu,
174+
gpus: requiredGPUs,
175+
}, nil
176+
}
177+
178+
func (r resourceRequirements) combine(o *resourceRequirements) resourceRequirements {
179+
if o == nil {
180+
return r
181+
}
182+
return resourceRequirements{
183+
memory: maxUnitBytes(r.memory, o.memory),
184+
cpus: math.Max(r.cpus, o.cpus),
185+
gpus: maxInt64(r.gpus, o.gpus),
186+
}
187+
}
188+
189+
func (r eitherRequirementsOrError) max() (*resourceRequirements, error) {
190+
if r.err != nil {
191+
return nil, r.err
192+
}
193+
min := resourceRequirements{}
194+
for _, req := range r.requirements {
195+
min = min.combine(req)
196+
}
197+
return &min, nil
198+
}
199+
200+
func maxInt64(a, b int64) int64 {
201+
if a > b {
202+
return a
203+
}
204+
return b
205+
}
206+
207+
func maxUnitBytes(a, b types.UnitBytes) types.UnitBytes {
208+
if a > b {
209+
return a
140210
}
141-
return minMemory, minCPU, minGPU, nil
211+
return b
142212
}

ecs/gpu_test.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,71 +28,71 @@ func TestGuessMachineType(t *testing.T) {
2828
wantErr bool
2929
}{
3030
{
31-
name: "1-gpu",
31+
name: "1-gpus",
3232
yaml: `
3333
services:
3434
learning:
35-
image: tensorflow/tensorflow:latest-gpu
35+
image: tensorflow/tensorflow:latest-gpus
3636
deploy:
3737
resources:
3838
reservations:
3939
generic_resources:
4040
- discrete_resource_spec:
41-
kind: gpu
41+
kind: gpus
4242
value: 1
4343
`,
4444
want: "p3.2xlarge",
4545
wantErr: false,
4646
},
4747
{
48-
name: "4-gpu",
48+
name: "4-gpus",
4949
yaml: `
5050
services:
5151
learning:
52-
image: tensorflow/tensorflow:latest-gpu
52+
image: tensorflow/tensorflow:latest-gpus
5353
deploy:
5454
resources:
5555
reservations:
5656
generic_resources:
5757
- discrete_resource_spec:
58-
kind: gpu
58+
kind: gpus
5959
value: 4
6060
`,
6161
want: "p3.8xlarge",
6262
wantErr: false,
6363
},
6464
{
65-
name: "1-gpu, high-memory",
65+
name: "1-gpus, high-memory",
6666
yaml: `
6767
services:
6868
learning:
69-
image: tensorflow/tensorflow:latest-gpu
69+
image: tensorflow/tensorflow:latest-gpus
7070
deploy:
7171
resources:
7272
reservations:
7373
memory: 300Gb
7474
generic_resources:
7575
- discrete_resource_spec:
76-
kind: gpu
76+
kind: gpus
7777
value: 2
7878
`,
7979
want: "p3.16xlarge",
8080
wantErr: false,
8181
},
8282
{
83-
name: "1-gpu, high-cpu",
83+
name: "1-gpus, high-cpu",
8484
yaml: `
8585
services:
8686
learning:
87-
image: tensorflow/tensorflow:latest-gpu
87+
image: tensorflow/tensorflow:latest-gpus
8888
deploy:
8989
resources:
9090
reservations:
9191
memory: 32Gb
9292
cpus: "32"
9393
generic_resources:
9494
- discrete_resource_spec:
95-
kind: gpu
95+
kind: gpus
9696
value: 2
9797
`,
9898
want: "p3.8xlarge",

0 commit comments

Comments
 (0)