|
| 1 | +/* |
| 2 | + Copyright 2020 Docker, Inc. |
| 3 | +
|
| 4 | + Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + you may not use this file except in compliance with the License. |
| 6 | + You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | + Unless required by applicable law or agreed to in writing, software |
| 11 | + distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + See the License for the specific language governing permissions and |
| 14 | + limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package ecs |
| 18 | + |
| 19 | +import ( |
| 20 | + "fmt" |
| 21 | + "math" |
| 22 | + "strconv" |
| 23 | + |
| 24 | + "github.com/compose-spec/compose-go/types" |
| 25 | + "github.com/docker/go-units" |
| 26 | +) |
| 27 | + |
| 28 | +type machine struct { |
| 29 | + id string |
| 30 | + cpus float64 |
| 31 | + memory types.UnitBytes |
| 32 | + gpus int64 |
| 33 | +} |
| 34 | + |
| 35 | +type family []machine |
| 36 | + |
| 37 | +var p3family = family{ |
| 38 | + { |
| 39 | + id: "p3.2xlarge", |
| 40 | + cpus: 8, |
| 41 | + memory: 64 * units.GiB, |
| 42 | + gpus: 2, |
| 43 | + }, |
| 44 | + { |
| 45 | + id: "p3.8xlarge", |
| 46 | + cpus: 32, |
| 47 | + memory: 244 * units.GiB, |
| 48 | + gpus: 4, |
| 49 | + }, |
| 50 | + { |
| 51 | + id: "p3.16xlarge", |
| 52 | + cpus: 64, |
| 53 | + memory: 488 * units.GiB, |
| 54 | + gpus: 8, |
| 55 | + }, |
| 56 | +} |
| 57 | + |
| 58 | +type filterFn func(machine) bool |
| 59 | + |
| 60 | +func (f family) filter(fn filterFn) family { |
| 61 | + var filtered family |
| 62 | + for _, machine := range f { |
| 63 | + if fn(machine) { |
| 64 | + filtered = append(filtered, machine) |
| 65 | + } |
| 66 | + } |
| 67 | + return filtered |
| 68 | +} |
| 69 | + |
| 70 | +func (f family) firstOrError(msg string, args ...interface{}) (machine, error) { |
| 71 | + if len(f) == 0 { |
| 72 | + return machine{}, fmt.Errorf(msg, args...) |
| 73 | + } |
| 74 | + return f[0], nil |
| 75 | +} |
| 76 | + |
| 77 | +func guessMachineType(project *types.Project) (string, error) { |
| 78 | + // we select a machine type to match all gpus-bound services requirements |
| 79 | + // once https://github.com/aws/containers-roadmap/issues/631 is implemented we can define dedicated CapacityProviders per service. |
| 80 | + requirements, err := getResourceRequirements(project) |
| 81 | + if err != nil { |
| 82 | + return "", err |
| 83 | + } |
| 84 | + |
| 85 | + instanceType, err := p3family. |
| 86 | + filter(func(m machine) bool { |
| 87 | + return m.memory >= requirements.memory |
| 88 | + }). |
| 89 | + filter(func(m machine) bool { |
| 90 | + return m.cpus >= requirements.cpus |
| 91 | + }). |
| 92 | + filter(func(m machine) bool { |
| 93 | + return m.gpus >= requirements.gpus |
| 94 | + }). |
| 95 | + firstOrError("none of the Amazon EC2 P3 instance types meet the requirements for memory:%d cpu:%f gpus:%d", requirements.memory, requirements.cpus, requirements.gpus) |
| 96 | + if err != nil { |
| 97 | + return "", err |
| 98 | + } |
| 99 | + return instanceType.id, nil |
| 100 | +} |
| 101 | + |
| 102 | +type resourceRequirements struct { |
| 103 | + memory types.UnitBytes |
| 104 | + cpus float64 |
| 105 | + gpus int64 |
| 106 | +} |
| 107 | + |
| 108 | +func getResourceRequirements(project *types.Project) (*resourceRequirements, error) { |
| 109 | + return toResourceRequirementsSlice(project). |
| 110 | + filter(func(requirements *resourceRequirements) bool { |
| 111 | + return requirements.gpus != 0 |
| 112 | + }). |
| 113 | + max() |
| 114 | +} |
| 115 | + |
| 116 | +type eitherRequirementsOrError struct { |
| 117 | + requirements []*resourceRequirements |
| 118 | + err error |
| 119 | +} |
| 120 | + |
| 121 | +func toResourceRequirementsSlice(project *types.Project) eitherRequirementsOrError { |
| 122 | + var requirements []*resourceRequirements |
| 123 | + for _, service := range project.Services { |
| 124 | + r, err := toResourceRequirements(service) |
| 125 | + if err != nil { |
| 126 | + return eitherRequirementsOrError{nil, err} |
| 127 | + } |
| 128 | + requirements = append(requirements, r) |
| 129 | + } |
| 130 | + return eitherRequirementsOrError{requirements, nil} |
| 131 | +} |
| 132 | + |
| 133 | +func (r eitherRequirementsOrError) filter(fn func(*resourceRequirements) bool) eitherRequirementsOrError { |
| 134 | + if r.err != nil { |
| 135 | + return r |
| 136 | + } |
| 137 | + var requirements []*resourceRequirements |
| 138 | + for _, req := range r.requirements { |
| 139 | + if fn(req) { |
| 140 | + requirements = append(requirements, req) |
| 141 | + } |
| 142 | + } |
| 143 | + return eitherRequirementsOrError{requirements, nil} |
| 144 | +} |
| 145 | + |
| 146 | +func toResourceRequirements(service types.ServiceConfig) (*resourceRequirements, error) { |
| 147 | + if service.Deploy == nil { |
| 148 | + return nil, nil |
| 149 | + } |
| 150 | + reservations := service.Deploy.Resources.Reservations |
| 151 | + if reservations == nil { |
| 152 | + return nil, nil |
| 153 | + } |
| 154 | + |
| 155 | + var requiredGPUs int64 |
| 156 | + for _, r := range reservations.GenericResources { |
| 157 | + if r.DiscreteResourceSpec.Kind == "gpus" { |
| 158 | + requiredGPUs = r.DiscreteResourceSpec.Value |
| 159 | + break |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + var nanocpu float64 |
| 164 | + if reservations.NanoCPUs != "" { |
| 165 | + v, err := strconv.ParseFloat(reservations.NanoCPUs, 64) |
| 166 | + if err != nil { |
| 167 | + return nil, err |
| 168 | + } |
| 169 | + nanocpu = v |
| 170 | + } |
| 171 | + return &resourceRequirements{ |
| 172 | + memory: reservations.MemoryBytes, |
| 173 | + cpus: nanocpu, |
| 174 | + gpus: requiredGPUs, |
| 175 | + }, nil |
| 176 | +} |
| 177 | + |
| 178 | +func (r resourceRequirements) combine(o *resourceRequirements) resourceRequirements { |
| 179 | + if o == nil { |
| 180 | + return r |
| 181 | + } |
| 182 | + return resourceRequirements{ |
| 183 | + memory: maxUnitBytes(r.memory, o.memory), |
| 184 | + cpus: math.Max(r.cpus, o.cpus), |
| 185 | + gpus: maxInt64(r.gpus, o.gpus), |
| 186 | + } |
| 187 | +} |
| 188 | + |
| 189 | +func (r eitherRequirementsOrError) max() (*resourceRequirements, error) { |
| 190 | + if r.err != nil { |
| 191 | + return nil, r.err |
| 192 | + } |
| 193 | + min := resourceRequirements{} |
| 194 | + for _, req := range r.requirements { |
| 195 | + min = min.combine(req) |
| 196 | + } |
| 197 | + return &min, nil |
| 198 | +} |
| 199 | + |
| 200 | +func maxInt64(a, b int64) int64 { |
| 201 | + if a > b { |
| 202 | + return a |
| 203 | + } |
| 204 | + return b |
| 205 | +} |
| 206 | + |
| 207 | +func maxUnitBytes(a, b types.UnitBytes) types.UnitBytes { |
| 208 | + if a > b { |
| 209 | + return a |
| 210 | + } |
| 211 | + return b |
| 212 | +} |
0 commit comments