Skip to content

Commit

Permalink
feat: Support karpenter.sh/instance-network-bandwidth (aws#3432)
Browse files Browse the repository at this point in the history
Co-authored-by: Brandon Wagner <[email protected]>
  • Loading branch information
ellistarn and bwagner5 authored Feb 22, 2023
1 parent bc48d71 commit 0115e3b
Show file tree
Hide file tree
Showing 10 changed files with 798 additions and 12 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.20

require (
github.com/Pallinder/go-randomdata v1.2.0
github.com/PuerkitoBio/goquery v1.8.1
github.com/avast/retry-go v3.0.0+incompatible
github.com/aws/aws-sdk-go v1.44.195
github.com/aws/karpenter-core v0.25.0
Expand Down Expand Up @@ -32,6 +33,7 @@ require (
contrib.go.opencensus.io/exporter/prometheus v0.4.0 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/benbjohnson/clock v1.1.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg=
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ=
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
Expand All @@ -61,6 +63,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
Expand Down Expand Up @@ -499,6 +503,7 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210503060351-7fd8e65b6420/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
Expand Down
16 changes: 14 additions & 2 deletions hack/api-code-gen.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
#!/usr/bin/env bash
set -euo pipefail

if [ -z ${ENABLE_GIT_PUSH+x} ];then
if [ -z ${ENABLE_GIT_PUSH+x} ]; then
ENABLE_GIT_PUSH=false
fi

echo "api-code-gen running ENABLE_GIT_PUSH: ${ENABLE_GIT_PUSH}"

bandwidth() {
GENERATED_FILE="pkg/cloudprovider/zz_generated.bandwidth.go"
NO_UPDATE=''
SUBJECT="Bandwidth"

go run hack/code/bandwidth_gen.go -- "${GENERATED_FILE}"

GIT_DIFF=$(git diff --stat "${GENERATED_FILE}")
checkForUpdates "${GIT_DIFF}" "${NO_UPDATE}" "${SUBJECT}" "${GENERATED_FILE}"
}

pricing() {
GENERATED_FILE="pkg/cloudprovider/zz_generated.pricing.go"
NO_UPDATE=$' pkg/cloudprovider/zz_generated.pricing.go | 4 ++--\n 1 file changed, 2 insertions(+), 2 deletions(-)'
Expand Down Expand Up @@ -82,6 +93,7 @@ if [[ $ENABLE_GIT_PUSH == true ]]; then
gitOpenAndPullBranch
fi

bandwidth
pricing
vpcLimits
instanceTypeTestData
instanceTypeTestData
100 changes: 100 additions & 0 deletions hack/code/bandwidth_gen.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"flag"
"fmt"
"go/format"
"log"
"net/http"
"os"
"sort"
"strconv"

"github.com/PuerkitoBio/goquery"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/samber/lo"
)

var uriSelectors = map[string]string{
"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/general-purpose-instances.html": "#general-purpose-network-performance",
"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/compute-optimized-instances.html": "#compute-network-performance",
"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/memory-optimized-instances.html": "#memory-network-perf",
"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/storage-optimized-instances.html": "#storage-network-performance",
"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/accelerated-computing-instances.html": "#gpu-network-performance",
}

const fileFormat = `
package cloudprovider
// GENERATED FILE. DO NOT EDIT DIRECTLY.
// Update hack/code/bandwidth_gen.go and re-generate to edit
// You can add instance types by adding to the --instance-types CLI flag
var (
InstanceTypeBandwidthMegabits = map[string]int64{
%s
}
)
`
func main() {
flag.Parse()
if flag.NArg() != 1 {
log.Fatalf("Usage: `bandwidth_gen.go pkg/cloudprovider/zz_generated.pricing.go`")
}

bandwidth := map[string]int64{}

for uri, selector := range uriSelectors {
response := lo.Must(http.Get(uri))
defer response.Body.Close()

doc := lo.Must(goquery.NewDocumentFromReader(response.Body))
for _, row := range doc.Find(selector).Next().Next().Next().Find("tbody").Find("tr").Nodes {
instanceTypeData := row.FirstChild.NextSibling.FirstChild.FirstChild.Data
bandwidthData := row.FirstChild.NextSibling.NextSibling.NextSibling.FirstChild.Data
bandwidth[instanceTypeData] = int64(lo.Must(strconv.ParseFloat(bandwidthData, 64)) * 1000)
}
}

sess := session.Must(session.NewSession())
ec2api := ec2.New(sess)
instanceTypesOutput := lo.Must(ec2api.DescribeInstanceTypes(&ec2.DescribeInstanceTypesInput{}))
allInstanceTypes := lo.Map(instanceTypesOutput.InstanceTypes, func(info *ec2.InstanceTypeInfo, _ int) string { return *info.InstanceType })

instanceTypes := lo.Keys(bandwidth)
// 2d sort for readability
sort.Strings(instanceTypes)
sort.SliceStable(instanceTypes, func(i, j int) bool {
return bandwidth[instanceTypes[i]] < bandwidth[instanceTypes[j]]
})

// Generate body
var body string
for _, instanceType := range lo.Without(allInstanceTypes, instanceTypes...) {
body += fmt.Sprintf("// %s is not available in https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-network-bandwidth.html\n", instanceType)
}
for _, instanceType := range instanceTypes {
body += fmt.Sprintf("\t\"%s\": %d,\n", instanceType, bandwidth[instanceType])
}

// Format and print to the file
formatted := lo.Must(format.Source([]byte(fmt.Sprintf(fileFormat, body))))
file := lo.Must(os.Create(flag.Args()[0]))
lo.Must(file.Write(formatted))
file.Close()
}
2 changes: 2 additions & 0 deletions pkg/apis/v1alpha1/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ var (
LabelInstanceSize = LabelDomain + "/instance-size"
LabelInstanceCPU = LabelDomain + "/instance-cpu"
LabelInstanceMemory = LabelDomain + "/instance-memory"
LabelInstanceNetworkBandwidth = LabelDomain + "/instance-network-bandwidth"
LabelInstancePods = LabelDomain + "/instance-pods"
LabelInstanceGPUName = LabelDomain + "/instance-gpu-name"
LabelInstanceGPUManufacturer = LabelDomain + "/instance-gpu-manufacturer"
Expand Down Expand Up @@ -106,6 +107,7 @@ func init() {
LabelInstanceLocalNVME,
LabelInstanceCPU,
LabelInstanceMemory,
LabelInstanceNetworkBandwidth,
LabelInstancePods,
LabelInstanceGPUName,
LabelInstanceGPUManufacturer,
Expand Down
5 changes: 5 additions & 0 deletions pkg/cloudprovider/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ func computeRequirements(ctx context.Context, info *ec2.InstanceTypeInfo, offeri
// Well Known to AWS
scheduling.NewRequirement(v1alpha1.LabelInstanceCPU, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.VCpuInfo.DefaultVCpus))),
scheduling.NewRequirement(v1alpha1.LabelInstanceMemory, v1.NodeSelectorOpIn, fmt.Sprint(aws.Int64Value(info.MemoryInfo.SizeInMiB))),
scheduling.NewRequirement(v1alpha1.LabelInstanceNetworkBandwidth, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1alpha1.LabelInstancePods, v1.NodeSelectorOpIn, fmt.Sprint(pods(ctx, info, amiFamily, kc))),
scheduling.NewRequirement(v1alpha1.LabelInstanceCategory, v1.NodeSelectorOpDoesNotExist),
scheduling.NewRequirement(v1alpha1.LabelInstanceFamily, v1.NodeSelectorOpDoesNotExist),
Expand Down Expand Up @@ -105,6 +106,10 @@ func computeRequirements(ctx context.Context, info *ec2.InstanceTypeInfo, offeri
if info.InstanceStorageInfo != nil && aws.StringValue(info.InstanceStorageInfo.NvmeSupport) != ec2.EphemeralNvmeSupportUnsupported {
requirements[v1alpha1.LabelInstanceLocalNVME].Insert(fmt.Sprint(aws.Int64Value(info.InstanceStorageInfo.TotalSizeInGB)))
}
// Network bandwidth
if bandwidth, ok := InstanceTypeBandwidthMegabits[aws.StringValue(info.InstanceType)]; ok {
requirements[v1alpha1.LabelInstanceNetworkBandwidth].Insert(fmt.Sprint(bandwidth))
}
// GPU Labels
if info.GpuInfo != nil && len(info.GpuInfo.Gpus) == 1 {
gpu := info.GpuInfo.Gpus[0]
Expand Down
35 changes: 26 additions & 9 deletions pkg/cloudprovider/instancetypes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,46 @@ import (
var _ = Describe("Instance Types", func() {
It("should support instance type labels", func() {
ExpectApplied(ctx, env.Client, provisioner, nodeTemplate)
var pods []*v1.Pod
for key, value := range map[string]string{

nodeSelector := map[string]string{
// Well known
v1alpha5.ProvisionerNameLabelKey: provisioner.Name,
v1.LabelTopologyRegion: "",
v1.LabelTopologyZone: "test-zone-1a",
v1.LabelInstanceTypeStable: "g4dn.8xlarge",
v1.LabelOSStable: "linux",
v1.LabelArchStable: "amd64",
v1alpha5.LabelCapacityType: "on-demand",
// Well Known to AWS
v1alpha1.LabelInstanceHypervisor: "nitro",
v1alpha1.LabelInstanceEncryptionInTransitSupported: "true",
v1alpha1.LabelInstanceCategory: "g",
v1alpha1.LabelInstanceFamily: "g4dn",
v1alpha1.LabelInstanceGeneration: "4",
v1alpha1.LabelInstanceFamily: "g4dn",
v1alpha1.LabelInstanceSize: "8xlarge",
v1alpha1.LabelInstanceCPU: "32",
v1alpha1.LabelInstanceMemory: "131072",
v1alpha1.LabelInstanceNetworkBandwidth: "50000",
v1alpha1.LabelInstancePods: "58",
v1alpha1.LabelInstanceGPUName: "t4",
v1alpha1.LabelInstanceGPUManufacturer: "nvidia",
v1alpha1.LabelInstanceGPUCount: "1",
v1alpha1.LabelInstanceGPUMemory: "16384",
v1alpha1.LabelInstanceLocalNVME: "900",
} {
pods = append(pods, coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{key: value}}))
}
ExpectProvisioned(ctx, env.Client, cluster, prov, pods...)
for _, pod := range pods {
ExpectScheduled(ctx, env.Client, pod)
// Deprecated Labels
v1.LabelFailureDomainBetaRegion: "",
v1.LabelFailureDomainBetaZone: "test-zone-1a",
"beta.kubernetes.io/arch": "amd64",
"beta.kubernetes.io/os": "linux",
v1.LabelInstanceType: "g4dn.8xlarge",
"topology.ebs.csi.aws.com/zone": "test-zone-1a",
}

// Ensure that we're exercising all well known labels
Expect(lo.Keys(nodeSelector)).To(ContainElements(append(v1alpha5.WellKnownLabels.UnsortedList(), lo.Keys(v1alpha5.NormalizedLabels)...)))
pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: nodeSelector})
ExpectProvisioned(ctx, env.Client, cluster, prov, pod)
ExpectScheduled(ctx, env.Client, pod)
})
It("should not launch AWS Pod ENI on a t3", func() {
ExpectApplied(ctx, env.Client, provisioner, nodeTemplate)
Expand Down
Loading

0 comments on commit 0115e3b

Please sign in to comment.