Skip to content

Commit

Permalink
Add AWS EC2 Spot functionality to Escalator (#188)
Browse files Browse the repository at this point in the history
* Add AWS EC2 Spot functionality to Escalator

* Fixes for the linter
  • Loading branch information
haugenj authored Apr 22, 2020
1 parent 85fd5fd commit 8d2353e
Show file tree
Hide file tree
Showing 9 changed files with 514 additions and 38 deletions.
2 changes: 2 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ func setupCloudProvider(nodegroups []controller.NodeGroupOptions) cloudprovider.
LaunchTemplateID: n.AWS.LaunchTemplateID,
LaunchTemplateVersion: n.AWS.LaunchTemplateVersion,
FleetInstanceReadyTimeout: n.AWS.FleetInstanceReadyTimeoutDuration(),
Lifecycle: n.AWS.Lifecycle,
InstanceTypeOverrides: n.AWS.InstanceTypeOverrides,
},
})
}
Expand Down
18 changes: 18 additions & 0 deletions docs/configuration/nodegroup.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ node_groups:
fleet_instance_ready_timeout: 1m
launch_template_version: lt-1a2b3c4d
launch_template_id: "1"
lifecycle: on-demand
instance_type_overrides: ["t2.large", "t3.large"]
```
## Options
Expand Down Expand Up @@ -224,3 +226,19 @@ numeric string. This value can be obtained through the AWS EC2 console on the La
`LatestVersionNumber` or `DefaultVersionNumber` field returned from the
[create-launch-template](https://docs.aws.amazon.com/cli/latest/reference/ec2/create-launch-template.html) CLI command
and AWS API call.

### `aws.lifecyle`

Dependent on Launch Template ID being specified.

This optional value is the lifecycle of the instances that will be launched. The accepted values are strings of either
`on-demand` or `spot` to request On-Demand or Spot instances respectively. If no value is specified this will default
to `on-demand`.

### `aws.instance_type_overrides`

Dependent on Launch Template ID being specified.

An optional list of instance types to override the instance type within the launch template. Providing multiple instance
types here increases the likelihood of a Spot request being successful. If omitted the instance type to request will
be taken from the launch template.
8 changes: 7 additions & 1 deletion docs/deployment/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@ Escalator requires the following IAM policy to be able to properly integrate wit
"Effect": "Allow",
"Action": [
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeAutoScalingInstances",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeTags",
"autoscaling:SetDesiredCapacity",
"autoscaling:TerminateInstanceInAutoScalingGroup",
"ec2:DescribeInstances"
"ec2:DescribeInstances",
"ec2:DescribeLaunchTemplateVersions"
],
"Resource": "*"
}
Expand Down Expand Up @@ -99,3 +103,5 @@ region.
- Do not use
[Auto Scaling Lifecycle Hooks](https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html) for
terminating of instances as Escalator will handle the termination of instances itself.
- If using launch templates do not use the "network settings" area to configure the security groups. The security groups
should be configured via a network interface.
158 changes: 126 additions & 32 deletions pkg/cloudprovider/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,16 @@ import (
v1 "k8s.io/api/core/v1"
)

// ProviderName identifies this module as aws
const ProviderName = "aws"
const (
// ProviderName identifies this module as aws
ProviderName = "aws"
// LifecycleOnDemand string constant for On-Demand EC2 instances
LifecycleOnDemand = "on-demand"
// LifecycleSpot string constant for Spot EC2 instances
LifecycleSpot = "spot"
// The AttachInstances API only supports adding 20 instances at a time
batchSize = 20
)

func instanceToProviderID(instance *autoscaling.Instance) string {
return fmt.Sprintf("aws:///%s/%s", *instance.AvailabilityZone, *instance.InstanceId)
Expand Down Expand Up @@ -236,7 +244,7 @@ func (n *NodeGroup) IncreaseSize(delta int64) error {
return n.setASGDesiredSizeOneShot(delta)
}

log.WithField("asg", n.id).Infof("Scaling with SetDesiredCapacity trategy")
log.WithField("asg", n.id).Infof("Scaling with SetDesiredCapacity strategy")
return n.setASGDesiredSize(n.TargetSize() + delta)

}
Expand Down Expand Up @@ -343,36 +351,27 @@ func (n *NodeGroup) setASGDesiredSize(newSize int64) error {
// setASGDesiredSizeOneShot uses the AWS fleet API to acquire all desired
// capacity in one step and then add it to the existing auto-scaling group.
func (n *NodeGroup) setASGDesiredSizeOneShot(addCount int64) error {
fleet, err := n.provider.ec2Service.CreateFleet(&ec2.CreateFleetInput{
Type: awsapi.String("instant"),
TerminateInstancesWithExpiration: awsapi.Bool(false),
OnDemandOptions: &ec2.OnDemandOptionsRequest{
MinTargetCapacity: awsapi.Int64(addCount),
SingleInstanceType: awsapi.Bool(true),
},
TargetCapacitySpecification: &ec2.TargetCapacitySpecificationRequest{
OnDemandTargetCapacity: awsapi.Int64(addCount),
TotalTargetCapacity: awsapi.Int64(addCount),
DefaultTargetCapacityType: awsapi.String("on-demand"),
},
LaunchTemplateConfigs: []*ec2.FleetLaunchTemplateConfigRequest{
{
LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{
LaunchTemplateId: awsapi.String(n.config.AWSConfig.LaunchTemplateID),
Version: awsapi.String(n.config.AWSConfig.LaunchTemplateVersion),
},
},
},
})
// Parse the Escalator args into the correct format for a CreateFleet request, then make the request.
fleetInput, err := createFleetInput(*n, addCount)
if err != nil {
log.Error("Failed setup for CreateFleet call.")
return err
}

// This will hold any launch errors for the fleet. In the case of an
// instant fleet with a single instant type this will indicate that the
// entire fleet failed to launch.
for _, lerr := range fleet.Errors {
return errors.New(*lerr.ErrorMessage)
fleet, err := n.provider.ec2Service.CreateFleet(fleetInput)
if err != nil {
log.Errorf("Failed CreateFleet call. CreateFleetInput: %v", fleetInput)
return err
}

// CreateFleet returns an array of errors with the response. Sometimes errors are present even when instances were
// successfully provisioned. In this case, the min target capacity is the size of the full request, so if any
// instances are present this indicates we got them all and can ignore the errors.
if len(fleet.Instances) == 0 && len(fleet.Errors) > 0 {
for _, err := range fleet.Errors {
log.Error(*err.ErrorMessage)
}
return errors.New(*fleet.Errors[0].ErrorMessage)
}

instances := make([]*string, 0)
Expand Down Expand Up @@ -402,8 +401,6 @@ InstanceReadyLoop:
}
}

// The AttachInstances API only supports adding 20 instances at a time
batchSize := 20
var batch []*string
for batchSize < len(instances) {
instances, batch = instances[batchSize:], instances[0:batchSize:batchSize]
Expand All @@ -413,6 +410,7 @@ InstanceReadyLoop:
InstanceIds: batch,
})
if err != nil {
log.Error("Failed AttachInstances call.")
return err
}
}
Expand All @@ -426,7 +424,12 @@ InstanceReadyLoop:

log.WithField("asg", n.id).Debugf("CurrentSize: %v", n.Size())
log.WithField("asg", n.id).Debugf("CurrentTargetSize: %v", n.TargetSize())
return err
if err != nil {
log.Error("Failed AttachInstances call.")
return err
}

return nil
}

func (n *NodeGroup) allInstancesReady(ids []*string) bool {
Expand All @@ -453,3 +456,94 @@ func (n *NodeGroup) allInstancesReady(ids []*string) bool {

return ready
}

// createFleetInput will parse Escalator input into the format needed for a CreateFleet request.
func createFleetInput(n NodeGroup, addCount int64) (*ec2.CreateFleetInput, error) {
lifecycle := n.config.AWSConfig.Lifecycle
if lifecycle == "" {
lifecycle = LifecycleOnDemand
}

launchTemplateOverrides, err := createTemplateOverrides(n)
if err != nil {
return nil, err
}

fleetInput := &ec2.CreateFleetInput{
Type: awsapi.String("instant"),
TerminateInstancesWithExpiration: awsapi.Bool(false),
TargetCapacitySpecification: &ec2.TargetCapacitySpecificationRequest{
TotalTargetCapacity: awsapi.Int64(addCount),
DefaultTargetCapacityType: awsapi.String(lifecycle),
},
LaunchTemplateConfigs: []*ec2.FleetLaunchTemplateConfigRequest{
{
LaunchTemplateSpecification: &ec2.FleetLaunchTemplateSpecificationRequest{
LaunchTemplateId: awsapi.String(n.config.AWSConfig.LaunchTemplateID),
Version: awsapi.String(n.config.AWSConfig.LaunchTemplateVersion),
},
Overrides: launchTemplateOverrides,
},
},
}

if lifecycle == LifecycleOnDemand {
fleetInput.OnDemandOptions = &ec2.OnDemandOptionsRequest{
MinTargetCapacity: awsapi.Int64(addCount),
SingleInstanceType: awsapi.Bool(true),
}
} else {
fleetInput.SpotOptions = &ec2.SpotOptionsRequest{
MinTargetCapacity: awsapi.Int64(addCount),
SingleInstanceType: awsapi.Bool(true),
}
}

return fleetInput, nil
}

// createTemplateOverrides will parse the overrides into the FleetLaunchTemplateOverridesRequest format
func createTemplateOverrides(n NodeGroup) ([]*ec2.FleetLaunchTemplateOverridesRequest, error) {
// Get subnetIDs from the ASG
describeASGOutput, err := n.provider.service.DescribeAutoScalingGroups(&autoscaling.DescribeAutoScalingGroupsInput{
AutoScalingGroupNames: []*string{
awsapi.String(n.id),
},
})
if err != nil {
log.Errorf("Failed call to DescribeAutoScalingGroups for ASG %v", n.id)
return nil, err
}
if len(describeASGOutput.AutoScalingGroups) == 0 {
return nil, errors.New("failed to get an ASG from DescribeAutoscalingGroups response")
}
if *describeASGOutput.AutoScalingGroups[0].VPCZoneIdentifier == "" {
return nil, errors.New("failed to get any subnetIDs from DescribeAutoscalingGroups response")
}
vpcZoneIdentifier := describeASGOutput.AutoScalingGroups[0].VPCZoneIdentifier
subnetIDs := strings.Split(*vpcZoneIdentifier, ",")

instanceTypes := n.config.AWSConfig.InstanceTypeOverrides

var launchTemplateOverrides []*ec2.FleetLaunchTemplateOverridesRequest
if len(instanceTypes) > 0 {
for i := range subnetIDs {
for j := range instanceTypes {
overridesRequest := ec2.FleetLaunchTemplateOverridesRequest{
SubnetId: &subnetIDs[i],
InstanceType: &instanceTypes[j],
}
launchTemplateOverrides = append(launchTemplateOverrides, &overridesRequest)
}
}
} else {
for i := range subnetIDs {
overridesRequest := ec2.FleetLaunchTemplateOverridesRequest{
SubnetId: &subnetIDs[i],
}
launchTemplateOverrides = append(launchTemplateOverrides, &overridesRequest)
}
}

return launchTemplateOverrides, nil
}
Loading

0 comments on commit 8d2353e

Please sign in to comment.